xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/rewrite.t (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1
2BEGIN {
3    unless ("A" eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate " .
5	    "cannot stringify a Unicode code point\n";
6	exit 0;
7    }
8    if ($ENV{PERL_CORE}) {
9	chdir('t') if -d 't';
10	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11    }
12}
13
14use strict;
15use warnings;
16BEGIN { $| = 1; print "1..17\n"; }
17my $count = 0;
18sub ok ($;$) {
19    my $p = my $r = shift;
20    if (@_) {
21	my $x = shift;
22	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
23    }
24    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
25}
26
27use Unicode::Collate;
28
29ok(1);
30
31#########################
32
33my $code = sub {
34    my $line = shift;
35    $line =~ s/\[\.0000\..{4}\..{4}\./[.0000.0000.0000./g;
36    return $line;
37  };
38
39#####
40
41my $Collator = Unicode::Collate->new(
42  table => 'keys.txt', normalization => undef, rewrite => $code,
43);
44
45ok($Collator->eq("camel", "came\x{300}l"));
46ok($Collator->eq("camel", "ca\x{300}me\x{301}l"));
47ok($Collator->lt("camel", "Camel"));
48{
49  my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
50  $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
51  ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
52}
53
54# 5
55
56my $rewriteDUCET = Unicode::Collate->new(
57  normalization => undef, rewrite => $code,
58);
59
60ok($rewriteDUCET->eq("camel", "came\x{300}l"));
61ok($rewriteDUCET->eq("camel", "ca\x{300}me\x{301}l"));
62ok($rewriteDUCET->lt("camel", "Camel"));
63{
64  my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
65  $rewriteDUCET->gsubst($s, "camel", sub { "=$_[0]=" });
66  ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
67}
68
69# 9
70
71my $undef_hira = Unicode::Collate->new(
72  table => 'keys.txt',
73  normalization => undef,
74  level => 1,
75  rewrite => sub {
76    my $line = shift;
77    return '' if $line =~ /HIRAGANA/;
78    return $line;
79  },
80);
81
82my $hiragana = "\x{3042}\x{3044}";
83my $katakana = "\x{30A2}\x{30A4}";
84my $cjkkanji = "\x{4E00}";
85
86# HIRAGANA are undefined via rewrite
87# So they are after CJK Unified Ideographs.
88
89ok($undef_hira->lt("abc", "perl"));
90ok($undef_hira->lt("", "ABC"));
91ok($undef_hira->lt($katakana, $hiragana));
92ok($undef_hira->lt($katakana, $cjkkanji));
93ok($undef_hira->lt($cjkkanji, $hiragana));
94
95$Collator->change(level => 1);
96ok($Collator->eq($katakana, $hiragana));
97ok($Collator->lt($katakana, $cjkkanji));
98ok($Collator->gt($cjkkanji, $hiragana));
99
100# 17
101