xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/override.t (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1BEGIN {
2    unless ("A" eq pack('U', 0x41)) {
3	print "1..0 # Unicode::Collate " .
4	    "cannot stringify a Unicode code point\n";
5	exit 0;
6    }
7    if ($ENV{PERL_CORE}) {
8	chdir('t') if -d 't';
9	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
10    }
11}
12
13use Test;
14BEGIN { plan tests => 76 };
15
16use strict;
17use warnings;
18use Unicode::Collate;
19
20ok(1);
21
22##### 2..6
23
24my $all_undef_8 = Unicode::Collate->new(
25  table => undef,
26  normalization => undef,
27  overrideCJK => undef,
28  overrideHangul => undef,
29  UCA_Version => 8,
30);
31
32# All in the Unicode code point order.
33# No hangul decomposition.
34
35ok($all_undef_8->lt("\x{3402}", "\x{4E00}"));
36ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}"));
37ok($all_undef_8->lt("\x{4E00}", "\x{AC00}"));
38ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}"));
39ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}"));
40
41
42##### 7..11
43
44my $all_undef_9 = Unicode::Collate->new(
45  table => undef,
46  normalization => undef,
47  overrideCJK => undef,
48  overrideHangul => undef,
49  UCA_Version => 9,
50);
51
52# CJK Ideo. < CJK ext A/B < Others.
53# No hangul decomposition.
54
55ok($all_undef_9->lt("\x{4E00}", "\x{3402}"));
56ok($all_undef_9->lt("\x{3402}", "\x{20000}"));
57ok($all_undef_9->lt("\x{20000}", "\x{AC00}"));
58ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}"));
59ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned
60
61##### 12..16
62
63my $ignoreHangul = Unicode::Collate->new(
64  table => undef,
65  normalization => undef,
66  overrideHangul => sub {()},
67  entry => <<'ENTRIES',
68AE00 ; [.0100.0020.0002.AE00]  # Hangul GEUL
69ENTRIES
70);
71
72# All Hangul Syllables except U+AE00 are ignored.
73
74ok($ignoreHangul->eq("\x{AC00}", ""));
75ok($ignoreHangul->lt("\x{AC00}", "\0"));
76ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}"));
77ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored.
78ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned.
79
80
81my $ignoreCJK = Unicode::Collate->new(
82  table => undef,
83  normalization => undef,
84  overrideCJK => sub {()},
85  entry => <<'ENTRIES',
865B57 ; [.0107.0020.0002.5B57]  # CJK Ideograph "Letter"
87ENTRIES
88);
89
90# All CJK Unified Ideographs except U+5B57 are ignored.
91
92##### 17..21
93ok($ignoreCJK->eq("\x{4E00}", ""));
94ok($ignoreCJK->lt("\x{4E00}", "\0"));
95ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK.
96ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
97ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
98
99##### 22..29
100ok($ignoreCJK->eq("\x{3400}", ""));
101ok($ignoreCJK->eq("\x{4DB5}", ""));
102ok($ignoreCJK->eq("\x{9FA5}", ""));
103ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
104ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
105ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
106ok($ignoreCJK->eq("\x{20000}", ""));
107ok($ignoreCJK->eq("\x{2A6D6}", ""));
108
109##### 30..37
110$ignoreCJK->change(UCA_Version => 9);
111ok($ignoreCJK->eq("\x{3400}", ""));
112ok($ignoreCJK->eq("\x{4DB5}", ""));
113ok($ignoreCJK->eq("\x{9FA5}", ""));
114ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
115ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
116ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
117ok($ignoreCJK->eq("\x{20000}", ""));
118ok($ignoreCJK->eq("\x{2A6D6}", ""));
119
120##### 38..45
121$ignoreCJK->change(UCA_Version => 8);
122ok($ignoreCJK->eq("\x{3400}", ""));
123ok($ignoreCJK->eq("\x{4DB5}", ""));
124ok($ignoreCJK->eq("\x{9FA5}", ""));
125ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
126ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
127ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
128ok($ignoreCJK->eq("\x{20000}", ""));
129ok($ignoreCJK->eq("\x{2A6D6}", ""));
130
131##### 46..53
132$ignoreCJK->change(UCA_Version => 14);
133ok($ignoreCJK->eq("\x{3400}", ""));
134ok($ignoreCJK->eq("\x{4DB5}", ""));
135ok($ignoreCJK->eq("\x{9FA5}", ""));
136ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
137ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
138ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
139ok($ignoreCJK->eq("\x{20000}", ""));
140ok($ignoreCJK->eq("\x{2A6D6}", ""));
141
142##### 54..76
143my $overCJK = Unicode::Collate->new(
144  table => undef,
145  normalization => undef,
146  entry => <<'ENTRIES',
1470061 ; [.0101.0020.0002.0061] # latin a
1480041 ; [.0101.0020.0008.0041] # LATIN A
1494E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
150ENTRIES
151  overrideCJK => sub {
152    my $u = 0xFFFF - $_[0]; # reversed
153    [$u, 0x20, 0x2, $u];
154  },
155);
156
157ok($overCJK->lt("a", "A")); # diff. at level 3.
158ok($overCJK->lt( "\x{4E03}",  "\x{4E00}")); # diff. at level 2.
159ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
160ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
161ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
162
163ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
164ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
165ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
166ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
167ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
168ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
169
170$overCJK->change(UCA_Version => 9);
171
172ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
173ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
174ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}"));
175ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}"));
176ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
177ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
178
179$overCJK->change(UCA_Version => 14);
180
181ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
182ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
183ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}"));
184ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}"));
185ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
186ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
187
188