xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/index.t (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17use strict;
18use warnings;
19BEGIN { $| = 1; print "1..91\n"; }
20my $count = 0;
21sub ok ($;$) {
22    my $p = my $r = shift;
23    if (@_) {
24	my $x = shift;
25	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
26    }
27    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
28}
29
30use Unicode::Collate;
31
32ok(1);
33
34our $IsEBCDIC = ord("A") != 0x41;
35
36my $Collator = Unicode::Collate->new(
37  table => 'keys.txt',
38  normalization => undef,
39);
40
41##### 1
42
43my %old_level = $Collator->change(level => 2);
44
45my $str;
46
47my $orig = "This is a Perl book.";
48my $sub = "PERL";
49my $rep = "camel";
50my $ret = "This is a camel book.";
51
52$str = $orig;
53if (my($pos,$len) = $Collator->index($str, $sub)) {
54  substr($str, $pos, $len, $rep);
55}
56
57ok($str, $ret);
58
59$Collator->change(%old_level);
60
61$str = $orig;
62if (my($pos,$len) = $Collator->index($str, $sub)) {
63  substr($str, $pos, $len, $rep);
64}
65
66ok($str, $orig);
67
68##### 3
69
70my $match;
71
72$Collator->change(level => 1);
73
74$str = "Pe\x{300}rl";
75$sub = "pe";
76$ret = "Pe\x{300}";
77$match = undef;
78if (my($pos, $len) = $Collator->index($str, $sub)) {
79    $match = substr($str, $pos, $len);
80}
81ok($match, $ret);
82
83$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
84$sub = "pE";
85$ret = "P\x{300}e\x{300}\x{301}\x{303}";
86$match = undef;
87if (my($pos, $len) = $Collator->index($str, $sub)) {
88    $match = substr($str, $pos, $len);
89}
90ok($match, $ret);
91
92$Collator->change(level => 2);
93
94$str = "Pe\x{300}rl";
95$sub = "pe";
96$ret = undef;
97$match = undef;
98if (my($pos, $len) = $Collator->index($str, $sub)) {
99    $match = substr($str, $pos, $len);
100}
101ok($match, $ret);
102
103$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
104$sub = "pE";
105$ret = undef;
106$match = undef;
107if (my($pos, $len) = $Collator->index($str, $sub)) {
108    $match = substr($str, $pos, $len);
109}
110ok($match, $ret);
111
112$str = "Pe\x{300}rl";
113$sub = "pe\x{300}";
114$ret = "Pe\x{300}";
115$match = undef;
116if (my($pos, $len) = $Collator->index($str, $sub)) {
117    $match = substr($str, $pos, $len);
118}
119ok($match, $ret);
120
121$str = "P\x{300}e\x{300}\x{301}\x{303}rl";
122$sub = "p\x{300}E\x{300}\x{301}\x{303}";
123$ret = "P\x{300}e\x{300}\x{301}\x{303}";
124$match = undef;
125if (my($pos, $len) = $Collator->index($str, $sub)) {
126    $match = substr($str, $pos, $len);
127}
128ok($match, $ret);
129
130##### 9
131
132$Collator->change(level => 1);
133
134$str = $IsEBCDIC
135    ? "Ich mu\x{0059} studieren Perl."
136    : "Ich mu\x{00DF} studieren Perl.";
137$sub = $IsEBCDIC
138    ? "m\x{00DC}ss"
139    : "m\x{00FC}ss";
140$ret = $IsEBCDIC
141    ? "mu\x{0059}"
142    : "mu\x{00DF}";
143$match = undef;
144if (my($pos, $len) = $Collator->index($str, $sub)) {
145    $match = substr($str, $pos, $len);
146}
147ok($match, $ret);
148
149$Collator->change(%old_level);
150
151$match = undef;
152if (my($pos, $len) = $Collator->index($str, $sub)) {
153    $match = substr($str, $pos, $len);
154}
155ok($match, undef);
156
157$match = undef;
158if (my($pos,$len) = $Collator->index("", "")) {
159    $match = substr("", $pos, $len);
160}
161ok($match, "");
162
163$match = undef;
164if (my($pos,$len) = $Collator->index("", "abc")) {
165    $match = substr("", $pos, $len);
166}
167ok($match, undef);
168
169##### 13
170
171$Collator->change(level => 1);
172
173$str = "\0\cA\0\cAe\0\x{300}\cA\x{301}\cB\x{302}\0 \0\cA";
174$sub = "e";
175$ret = "e\0\x{300}\cA\x{301}\cB\x{302}\0";
176$match = undef;
177if (my($pos, $len) = $Collator->index($str, $sub)) {
178    $match = substr($str, $pos, $len);
179}
180ok($match, $ret);
181
182$Collator->change(level => 1);
183
184$str = "\0\cA\0\cAe\0\cA\x{300}\0\cAe";
185$sub = "e";
186$ret = "e\0\cA\x{300}\0\cA";
187$match = undef;
188if (my($pos, $len) = $Collator->index($str, $sub)) {
189    $match = substr($str, $pos, $len);
190}
191ok($match, $ret);
192
193
194$Collator->change(%old_level);
195
196$str = "e\x{300}";
197$sub = "e";
198$ret = undef;
199$match = undef;
200if (my($pos, $len) = $Collator->index($str, $sub)) {
201    $match = substr($str, $pos, $len);
202}
203ok($match, $ret);
204
205##### 16
206
207$Collator->change(level => 1);
208
209$str = "The Perl is a language, and the perl is an interpreter.";
210$sub = "PERL";
211
212$match = undef;
213if (my($pos, $len) = $Collator->index($str, $sub, -40)) {
214    $match = substr($str, $pos, $len);
215}
216ok($match, "Perl");
217
218$match = undef;
219if (my($pos, $len) = $Collator->index($str, $sub, 4)) {
220    $match = substr($str, $pos, $len);
221}
222ok($match, "Perl");
223
224$match = undef;
225if (my($pos, $len) = $Collator->index($str, $sub, 5)) {
226    $match = substr($str, $pos, $len);
227}
228ok($match, "perl");
229
230$match = undef;
231if (my($pos, $len) = $Collator->index($str, $sub, 32)) {
232    $match = substr($str, $pos, $len);
233}
234ok($match, "perl");
235
236$match = undef;
237if (my($pos, $len) = $Collator->index($str, $sub, 33)) {
238    $match = substr($str, $pos, $len);
239}
240ok($match, undef);
241
242$match = undef;
243if (my($pos, $len) = $Collator->index($str, $sub, 100)) {
244    $match = substr($str, $pos, $len);
245}
246ok($match, undef);
247
248$Collator->change(%old_level);
249
250##### 22
251
252my @ret;
253
254$Collator->change(level => 1);
255
256$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
257ok($ret);
258ok($$ret eq "P\cBe\x{300}\cB");
259
260@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
261ok($ret[0], "P\cBe\x{300}\cB");
262
263$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
264$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";
265
266($ret) = $Collator->match($str, $sub);
267ok($ret, $str);
268
269$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
270$sub = $IsEBCDIC ? "m\x{00DC}s" : "m\x{00FC}s";
271
272($ret) = $Collator->match($str, $sub);
273ok($ret, undef);
274
275$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
276ok($ret eq "P\cBe\x{300}\cB:pe:PE");
277
278$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
279ok($ret == 3);
280
281$str = "ABCDEF";
282$sub = "cde";
283$ret = $Collator->match($str, $sub);
284$str = "01234567";
285ok($ret && $$ret, "CDE");
286
287$str = "ABCDEF";
288$sub = "cde";
289($ret) = $Collator->match($str, $sub);
290$str = "01234567";
291ok($ret, "CDE");
292
293
294$Collator->change(level => 3);
295
296$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
297ok($ret, undef);
298
299@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe");
300ok(@ret == 0);
301
302$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
303ok($ret eq "");
304
305$ret = $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe");
306ok($ret == 0);
307
308$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
309ok($ret eq "pe");
310
311$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe");
312ok($ret == 1);
313
314$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}";
315$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss";
316
317($ret) = $Collator->match($str, $sub);
318ok($ret, undef);
319
320$Collator->change(%old_level);
321
322##### 38
323
324$Collator->change(level => 1);
325
326sub strreverse { scalar reverse shift }
327
328$str = "P\cBe\x{300}\cBrl and PERL.";
329$ret = $Collator->subst($str, "perl", 'Camel');
330ok($ret, 1);
331ok($str, "Camel and PERL.");
332
333$str = "P\cBe\x{300}\cBrl and PERL.";
334$ret = $Collator->subst($str, "perl", \&strreverse);
335ok($ret, 1);
336ok($str, "lr\cB\x{300}e\cBP and PERL.");
337
338$str = "P\cBe\x{300}\cBrl and PERL.";
339$ret = $Collator->gsubst($str, "perl", 'Camel');
340ok($ret, 2);
341ok($str, "Camel and Camel.");
342
343$str = "P\cBe\x{300}\cBrl and PERL.";
344$ret = $Collator->gsubst($str, "perl", \&strreverse);
345ok($ret, 2);
346ok($str, "lr\cB\x{300}e\cBP and LREP.");
347
348$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
349$Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" });
350ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> "
351	. "<b>CAMEL</b> horse <b>cAm\0E\0L</b>...");
352
353##### 47
354
355# http://www.xray.mpe.mpg.de/mailing-lists/perl-unicode/2010-09/msg00014.html
356# when the substring includes an ignorable element like a space...
357
358$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L...";
359$Collator->gsubst($str, "camel horse", sub { "<b>$_[0]</b>" });
360ok($str, "Camel donkey zebra came\x{301}l <b>CAMEL horse</b> cAm\0E\0L...");
361
362$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
363$Collator->gsubst($str, "camel horse", sub { "=$_[0]=" });
364ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
365
366$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
367$Collator->gsubst($str, "camel-horse", sub { "=$_[0]=" });
368ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
369
370$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
371$Collator->gsubst($str, "camelhorse", sub { "=$_[0]=" });
372ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
373
374$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
375$Collator->gsubst($str, "  ca  mel  hor  se  ", sub { "=$_[0]=" });
376ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
377
378$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse...";
379$Collator->gsubst($str, "ca\x{300}melho\x{302}rse", sub { "=$_[0]=" });
380ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=...");
381
382##### 53
383
384$Collator->change(level => 3);
385
386$str = "P\cBe\x{300}\cBrl and PERL.";
387$ret = $Collator->subst($str, "perl", "Camel");
388ok(! $ret);
389ok($str, "P\cBe\x{300}\cBrl and PERL.");
390
391$str = "P\cBe\x{300}\cBrl and PERL.";
392$ret = $Collator->subst($str, "perl", \&strreverse);
393ok(! $ret);
394ok($str, "P\cBe\x{300}\cBrl and PERL.");
395
396$str = "P\cBe\x{300}\cBrl and PERL.";
397$ret = $Collator->gsubst($str, "perl", "Camel");
398ok($ret, 0);
399ok($str, "P\cBe\x{300}\cBrl and PERL.");
400
401$str = "P\cBe\x{300}\cBrl and PERL.";
402$ret = $Collator->gsubst($str, "perl", \&strreverse);
403ok($ret, 0);
404ok($str, "P\cBe\x{300}\cBrl and PERL.");
405
406$Collator->change(%old_level);
407
408##### 61
409
410$str = "Perl and Camel";
411$ret = $Collator->gsubst($str, "\cA\cA\0", "AB");
412ok($ret, 15);
413ok($str, "ABPABeABrABlAB ABaABnABdAB ABCABaABmABeABlAB");
414
415$str = '';
416$ret = $Collator->subst($str, "", "ABC");
417ok($ret, 1);
418ok($str, "ABC");
419
420$str = '';
421$ret = $Collator->gsubst($str, "", "ABC");
422ok($ret, 1);
423ok($str, "ABC");
424
425$str = 'PPPPP';
426$ret = $Collator->gsubst($str, 'PP', "ABC");
427ok($ret, 2);
428ok($str, "ABCABCP");
429
430##### 69
431
432# Shifted; ignorable after variable
433
434($ret) = $Collator->match("A?\x{300}!\x{301}\x{344}B\x{315}", "?!");
435ok($ret, "?\x{300}!\x{301}\x{344}");
436
437$Collator->change(alternate => 'Non-ignorable');
438
439($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!");
440ok($ret, undef);
441
442##### 71
443
444# Now preprocess is defined.
445
446$Collator->change(preprocess => sub {''});
447
448eval { $Collator->index("", "") };
449ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
450
451eval { $Collator->index("a", "a") };
452ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
453
454eval { $Collator->match("", "") };
455ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
456
457eval { $Collator->match("a", "a") };
458ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
459
460$Collator->change(preprocess => sub { uc shift });
461
462eval { $Collator->index("", "") };
463ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
464
465eval { $Collator->index("a", "a") };
466ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
467
468eval { $Collator->match("", "") };
469ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
470
471eval { $Collator->match("a", "a") };
472ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
473
474##### 79
475
476eval { require Unicode::Normalize };
477my $has_norm = !$@;
478
479if ($has_norm) {
480    # Now preprocess and normalization are defined.
481
482    $Collator->change(normalization => 'NFD');
483
484    eval { $Collator->index("", "") };
485    ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
486
487    eval { $Collator->index("a", "a") };
488    ok($@ && $@ =~ /Don't use Preprocess with index\(\)/);
489
490    eval { $Collator->match("", "") };
491    ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
492
493    eval { $Collator->match("a", "a") };
494    ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/);
495} else {
496    ok(1) for 1..4;
497}
498
499$Collator->change(preprocess => undef);
500
501if ($has_norm) {
502    # Now only normalization is defined.
503
504    eval { $Collator->index("", "") };
505    ok($@ && $@ =~ /Don't use Normalization with index\(\)/);
506
507    eval { $Collator->index("a", "a") };
508    ok($@ && $@ =~ /Don't use Normalization with index\(\)/);
509
510    eval { $Collator->match("", "") };
511    ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/);
512
513    eval { $Collator->match("a", "a") };
514    ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/);
515
516    $Collator->change(normalization => undef);
517} else {
518    ok(1) for 1..4;
519}
520
521##### 87
522
523# Now preprocess and normalization are undef.
524
525eval { $Collator->index("", "") };
526ok(!$@);
527
528eval { $Collator->index("a", "a") };
529ok(!$@);
530
531eval { $Collator->match("", "") };
532ok(!$@);
533
534eval { $Collator->match("a", "a") };
535ok(!$@);
536
537##### 91
538