xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/override.t (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1
2BEGIN {
3    unless ("A" eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate " .
5	    "cannot stringify a Unicode code point\n";
6	exit 0;
7    }
8    if ($ENV{PERL_CORE}) {
9	chdir('t') if -d 't';
10	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11    }
12}
13
14use strict;
15use warnings;
16BEGIN { $| = 1; print "1..35\n"; }
17my $count = 0;
18sub ok ($;$) {
19    my $p = my $r = shift;
20    if (@_) {
21	my $x = shift;
22	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
23    }
24    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
25}
26
27use Unicode::Collate;
28
29ok(1);
30
31#########################
32
33##### 2..6
34
35my $all_undef_8 = Unicode::Collate->new(
36  table => undef,
37  normalization => undef,
38  overrideCJK => undef,
39  overrideHangul => undef,
40  UCA_Version => 8,
41);
42
43# All in the Unicode code point order.
44# No hangul decomposition.
45
46ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
47ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
48ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
49ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
50ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
51
52
53##### 7..11
54
55my $all_undef_9 = Unicode::Collate->new(
56  table => undef,
57  normalization => undef,
58  overrideCJK => undef,
59  overrideHangul => undef,
60  UCA_Version => 9,
61);
62
63# CJK Ideo. < CJK ext A/B < Others.
64# No hangul decomposition.
65
66ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
67ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
68ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
69ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
70ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
71
72##### 12..16
73
74my $ignoreHangul = Unicode::Collate->new(
75  table => undef,
76  normalization => undef,
77  overrideHangul => sub {()},
78  entry => <<'ENTRIES',
79AE00 ; [.0100.0020.0002.AE00]  # Hangul GEUL
80ENTRIES
81);
82
83# All Hangul Syllables except U+AE00 are ignored.
84
85ok($ignoreHangul->eq("\x{AC00}", ""));
86ok($ignoreHangul->lt("\x{AC00}", "\0"));
87ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
88ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
89ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
90
91##### 17..21
92
93my $undefHangul = Unicode::Collate->new(
94  table => undef,
95  normalization => undef,
96  overrideHangul => sub {
97    my $u = shift;
98    return $u == 0xAE00 ? 0x100 : undef;
99  }
100);
101
102# All Hangul Syllables except U+AE00 are undefined.
103
104ok($undefHangul->lt("\x{AE00}", "r"));
105ok($undefHangul->gt("\x{AC00}", "r"));
106ok($undefHangul->gt("\x{AC00}", "\x{1100}\x{1161}"));
107ok($undefHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
108ok($undefHangul->lt("\x{AC00}", "\x{B000}"));
109
110##### 22..25
111
112my $undefCJK = Unicode::Collate->new(
113  table => undef,
114  normalization => undef,
115  overrideCJK => sub {
116    my $u = shift;
117    return $u == 0x4E00 ? 0x100 : undef;
118  }
119);
120
121# All CJK Ideographs except U+4E00 are undefined.
122
123ok($undefCJK->lt("\x{4E00}", "r"));
124ok($undefCJK->lt("\x{5000}", "r")); # still CJK < unassigned
125ok($undefCJK->lt("Pe\x{4E00}rl", "Perl")); # 'r' is unassigned.
126ok($undefCJK->lt("\x{5000}", "\x{6000}"));
127
128##### 26..30
129
130my $cpHangul = Unicode::Collate->new(
131  table => undef,
132  normalization => undef,
133  overrideHangul => sub { shift }
134);
135
136ok($cpHangul->lt("\x{AC00}", "\x{AC01}"));
137ok($cpHangul->lt("\x{AC01}", "\x{D7A3}"));
138ok($cpHangul->lt("\x{D7A3}", "r")); # 'r' is unassigned.
139ok($cpHangul->lt("r", "\x{D7A4}"));
140ok($cpHangul->lt("\x{D7A3}", "\x{4E00}"));
141
142##### 31..35
143
144my $arrayHangul = Unicode::Collate->new(
145  table => undef,
146  normalization => undef,
147  overrideHangul => sub {
148    my $u = shift;
149    return [$u, 0x20, 0x2, $u];
150  }
151);
152
153ok($arrayHangul->lt("\x{AC00}", "\x{AC01}"));
154ok($arrayHangul->lt("\x{AC01}", "\x{D7A3}"));
155ok($arrayHangul->lt("\x{D7A3}", "r")); # 'r' is unassigned.
156ok($arrayHangul->lt("r", "\x{D7A4}"));
157ok($arrayHangul->lt("\x{D7A3}", "\x{4E00}"));
158
159