xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/Unicode/UCD.t (revision 0:68f95e015346)
1*0Sstevel@tonic-gateBEGIN {
2*0Sstevel@tonic-gate    if (ord("A") == 193) {
3*0Sstevel@tonic-gate	print "1..0 # Skip: EBCDIC\n";
4*0Sstevel@tonic-gate	exit 0;
5*0Sstevel@tonic-gate    }
6*0Sstevel@tonic-gate    chdir 't' if -d 't';
7*0Sstevel@tonic-gate    @INC = '../lib';
8*0Sstevel@tonic-gate    @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself
9*0Sstevel@tonic-gate}
10*0Sstevel@tonic-gate
11*0Sstevel@tonic-gateuse strict;
12*0Sstevel@tonic-gateuse Unicode::UCD;
13*0Sstevel@tonic-gateuse Test::More;
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gateBEGIN { plan tests => 179 };
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gateuse Unicode::UCD 'charinfo';
18*0Sstevel@tonic-gate
19*0Sstevel@tonic-gatemy $charinfo;
20*0Sstevel@tonic-gate
21*0Sstevel@tonic-gate$charinfo = charinfo(0x41);
22*0Sstevel@tonic-gate
23*0Sstevel@tonic-gateis($charinfo->{code},           '0041', 'LATIN CAPITAL LETTER A');
24*0Sstevel@tonic-gateis($charinfo->{name},           'LATIN CAPITAL LETTER A');
25*0Sstevel@tonic-gateis($charinfo->{category},       'Lu');
26*0Sstevel@tonic-gateis($charinfo->{combining},      '0');
27*0Sstevel@tonic-gateis($charinfo->{bidi},           'L');
28*0Sstevel@tonic-gateis($charinfo->{decomposition},  '');
29*0Sstevel@tonic-gateis($charinfo->{decimal},        '');
30*0Sstevel@tonic-gateis($charinfo->{digit},          '');
31*0Sstevel@tonic-gateis($charinfo->{numeric},        '');
32*0Sstevel@tonic-gateis($charinfo->{mirrored},       'N');
33*0Sstevel@tonic-gateis($charinfo->{unicode10},      '');
34*0Sstevel@tonic-gateis($charinfo->{comment},        '');
35*0Sstevel@tonic-gateis($charinfo->{upper},          '');
36*0Sstevel@tonic-gateis($charinfo->{lower},          '0061');
37*0Sstevel@tonic-gateis($charinfo->{title},          '');
38*0Sstevel@tonic-gateis($charinfo->{block},          'Basic Latin');
39*0Sstevel@tonic-gateis($charinfo->{script},         'Latin');
40*0Sstevel@tonic-gate
41*0Sstevel@tonic-gate$charinfo = charinfo(0x100);
42*0Sstevel@tonic-gate
43*0Sstevel@tonic-gateis($charinfo->{code},           '0100', 'LATIN CAPITAL LETTER A WITH MACRON');
44*0Sstevel@tonic-gateis($charinfo->{name},           'LATIN CAPITAL LETTER A WITH MACRON');
45*0Sstevel@tonic-gateis($charinfo->{category},       'Lu');
46*0Sstevel@tonic-gateis($charinfo->{combining},      '0');
47*0Sstevel@tonic-gateis($charinfo->{bidi},           'L');
48*0Sstevel@tonic-gateis($charinfo->{decomposition},  '0041 0304');
49*0Sstevel@tonic-gateis($charinfo->{decimal},        '');
50*0Sstevel@tonic-gateis($charinfo->{digit},          '');
51*0Sstevel@tonic-gateis($charinfo->{numeric},        '');
52*0Sstevel@tonic-gateis($charinfo->{mirrored},       'N');
53*0Sstevel@tonic-gateis($charinfo->{unicode10},      'LATIN CAPITAL LETTER A MACRON');
54*0Sstevel@tonic-gateis($charinfo->{comment},        '');
55*0Sstevel@tonic-gateis($charinfo->{upper},          '');
56*0Sstevel@tonic-gateis($charinfo->{lower},          '0101');
57*0Sstevel@tonic-gateis($charinfo->{title},          '');
58*0Sstevel@tonic-gateis($charinfo->{block},          'Latin Extended-A');
59*0Sstevel@tonic-gateis($charinfo->{script},         'Latin');
60*0Sstevel@tonic-gate
61*0Sstevel@tonic-gate# 0x0590 is in the Hebrew block but unused.
62*0Sstevel@tonic-gate
63*0Sstevel@tonic-gate$charinfo = charinfo(0x590);
64*0Sstevel@tonic-gate
65*0Sstevel@tonic-gateis($charinfo->{code},          undef,	'0x0590 - unused Hebrew');
66*0Sstevel@tonic-gateis($charinfo->{name},          undef);
67*0Sstevel@tonic-gateis($charinfo->{category},      undef);
68*0Sstevel@tonic-gateis($charinfo->{combining},     undef);
69*0Sstevel@tonic-gateis($charinfo->{bidi},          undef);
70*0Sstevel@tonic-gateis($charinfo->{decomposition}, undef);
71*0Sstevel@tonic-gateis($charinfo->{decimal},       undef);
72*0Sstevel@tonic-gateis($charinfo->{digit},         undef);
73*0Sstevel@tonic-gateis($charinfo->{numeric},       undef);
74*0Sstevel@tonic-gateis($charinfo->{mirrored},      undef);
75*0Sstevel@tonic-gateis($charinfo->{unicode10},     undef);
76*0Sstevel@tonic-gateis($charinfo->{comment},       undef);
77*0Sstevel@tonic-gateis($charinfo->{upper},         undef);
78*0Sstevel@tonic-gateis($charinfo->{lower},         undef);
79*0Sstevel@tonic-gateis($charinfo->{title},         undef);
80*0Sstevel@tonic-gateis($charinfo->{block},         undef);
81*0Sstevel@tonic-gateis($charinfo->{script},        undef);
82*0Sstevel@tonic-gate
83*0Sstevel@tonic-gate# 0x05d0 is in the Hebrew block and used.
84*0Sstevel@tonic-gate
85*0Sstevel@tonic-gate$charinfo = charinfo(0x5d0);
86*0Sstevel@tonic-gate
87*0Sstevel@tonic-gateis($charinfo->{code},           '05D0', '05D0 - used Hebrew');
88*0Sstevel@tonic-gateis($charinfo->{name},           'HEBREW LETTER ALEF');
89*0Sstevel@tonic-gateis($charinfo->{category},       'Lo');
90*0Sstevel@tonic-gateis($charinfo->{combining},      '0');
91*0Sstevel@tonic-gateis($charinfo->{bidi},           'R');
92*0Sstevel@tonic-gateis($charinfo->{decomposition},  '');
93*0Sstevel@tonic-gateis($charinfo->{decimal},        '');
94*0Sstevel@tonic-gateis($charinfo->{digit},          '');
95*0Sstevel@tonic-gateis($charinfo->{numeric},        '');
96*0Sstevel@tonic-gateis($charinfo->{mirrored},       'N');
97*0Sstevel@tonic-gateis($charinfo->{unicode10},      '');
98*0Sstevel@tonic-gateis($charinfo->{comment},        '');
99*0Sstevel@tonic-gateis($charinfo->{upper},          '');
100*0Sstevel@tonic-gateis($charinfo->{lower},          '');
101*0Sstevel@tonic-gateis($charinfo->{title},          '');
102*0Sstevel@tonic-gateis($charinfo->{block},          'Hebrew');
103*0Sstevel@tonic-gateis($charinfo->{script},         'Hebrew');
104*0Sstevel@tonic-gate
105*0Sstevel@tonic-gate# An open syllable in Hangul.
106*0Sstevel@tonic-gate
107*0Sstevel@tonic-gate$charinfo = charinfo(0xAC00);
108*0Sstevel@tonic-gate
109*0Sstevel@tonic-gateis($charinfo->{code},           'AC00', 'HANGUL SYLLABLE-AC00');
110*0Sstevel@tonic-gateis($charinfo->{name},           'HANGUL SYLLABLE-AC00');
111*0Sstevel@tonic-gateis($charinfo->{category},       'Lo');
112*0Sstevel@tonic-gateis($charinfo->{combining},      '0');
113*0Sstevel@tonic-gateis($charinfo->{bidi},           'L');
114*0Sstevel@tonic-gateis($charinfo->{decomposition},  undef);
115*0Sstevel@tonic-gateis($charinfo->{decimal},        '');
116*0Sstevel@tonic-gateis($charinfo->{digit},          '');
117*0Sstevel@tonic-gateis($charinfo->{numeric},        '');
118*0Sstevel@tonic-gateis($charinfo->{mirrored},       'N');
119*0Sstevel@tonic-gateis($charinfo->{unicode10},      '');
120*0Sstevel@tonic-gateis($charinfo->{comment},        '');
121*0Sstevel@tonic-gateis($charinfo->{upper},          '');
122*0Sstevel@tonic-gateis($charinfo->{lower},          '');
123*0Sstevel@tonic-gateis($charinfo->{title},          '');
124*0Sstevel@tonic-gateis($charinfo->{block},          'Hangul Syllables');
125*0Sstevel@tonic-gateis($charinfo->{script},         'Hangul');
126*0Sstevel@tonic-gate
127*0Sstevel@tonic-gate# A closed syllable in Hangul.
128*0Sstevel@tonic-gate
129*0Sstevel@tonic-gate$charinfo = charinfo(0xAE00);
130*0Sstevel@tonic-gate
131*0Sstevel@tonic-gateis($charinfo->{code},           'AE00', 'HANGUL SYLLABLE-AE00');
132*0Sstevel@tonic-gateis($charinfo->{name},           'HANGUL SYLLABLE-AE00');
133*0Sstevel@tonic-gateis($charinfo->{category},       'Lo');
134*0Sstevel@tonic-gateis($charinfo->{combining},      '0');
135*0Sstevel@tonic-gateis($charinfo->{bidi},           'L');
136*0Sstevel@tonic-gateis($charinfo->{decomposition},  undef);
137*0Sstevel@tonic-gateis($charinfo->{decimal},        '');
138*0Sstevel@tonic-gateis($charinfo->{digit},          '');
139*0Sstevel@tonic-gateis($charinfo->{numeric},        '');
140*0Sstevel@tonic-gateis($charinfo->{mirrored},       'N');
141*0Sstevel@tonic-gateis($charinfo->{unicode10},      '');
142*0Sstevel@tonic-gateis($charinfo->{comment},        '');
143*0Sstevel@tonic-gateis($charinfo->{upper},          '');
144*0Sstevel@tonic-gateis($charinfo->{lower},          '');
145*0Sstevel@tonic-gateis($charinfo->{title},          '');
146*0Sstevel@tonic-gateis($charinfo->{block},          'Hangul Syllables');
147*0Sstevel@tonic-gateis($charinfo->{script},         'Hangul');
148*0Sstevel@tonic-gate
149*0Sstevel@tonic-gate$charinfo = charinfo(0x1D400);
150*0Sstevel@tonic-gate
151*0Sstevel@tonic-gateis($charinfo->{code},           '1D400', 'MATHEMATICAL BOLD CAPITAL A');
152*0Sstevel@tonic-gateis($charinfo->{name},           'MATHEMATICAL BOLD CAPITAL A');
153*0Sstevel@tonic-gateis($charinfo->{category},       'Lu');
154*0Sstevel@tonic-gateis($charinfo->{combining},      '0');
155*0Sstevel@tonic-gateis($charinfo->{bidi},           'L');
156*0Sstevel@tonic-gateis($charinfo->{decomposition},  '<font> 0041');
157*0Sstevel@tonic-gateis($charinfo->{decimal},        '');
158*0Sstevel@tonic-gateis($charinfo->{digit},          '');
159*0Sstevel@tonic-gateis($charinfo->{numeric},        '');
160*0Sstevel@tonic-gateis($charinfo->{mirrored},       'N');
161*0Sstevel@tonic-gateis($charinfo->{unicode10},      '');
162*0Sstevel@tonic-gateis($charinfo->{comment},        '');
163*0Sstevel@tonic-gateis($charinfo->{upper},          '');
164*0Sstevel@tonic-gateis($charinfo->{lower},          '');
165*0Sstevel@tonic-gateis($charinfo->{title},          '');
166*0Sstevel@tonic-gateis($charinfo->{block},          'Mathematical Alphanumeric Symbols');
167*0Sstevel@tonic-gateis($charinfo->{script},         'Common');
168*0Sstevel@tonic-gate
169*0Sstevel@tonic-gateuse Unicode::UCD qw(charblock charscript);
170*0Sstevel@tonic-gate
171*0Sstevel@tonic-gate# 0x0590 is in the Hebrew block but unused.
172*0Sstevel@tonic-gate
173*0Sstevel@tonic-gateis(charblock(0x590),          'Hebrew', '0x0590 - Hebrew unused charblock');
174*0Sstevel@tonic-gateis(charscript(0x590),         undef,    '0x0590 - Hebrew unused charscript');
175*0Sstevel@tonic-gate
176*0Sstevel@tonic-gate$charinfo = charinfo(0xbe);
177*0Sstevel@tonic-gate
178*0Sstevel@tonic-gateis($charinfo->{code},           '00BE', 'VULGAR FRACTION THREE QUARTERS');
179*0Sstevel@tonic-gateis($charinfo->{name},           'VULGAR FRACTION THREE QUARTERS');
180*0Sstevel@tonic-gateis($charinfo->{category},       'No');
181*0Sstevel@tonic-gateis($charinfo->{combining},      '0');
182*0Sstevel@tonic-gateis($charinfo->{bidi},           'ON');
183*0Sstevel@tonic-gateis($charinfo->{decomposition},  '<fraction> 0033 2044 0034');
184*0Sstevel@tonic-gateis($charinfo->{decimal},        '');
185*0Sstevel@tonic-gateis($charinfo->{digit},          '');
186*0Sstevel@tonic-gateis($charinfo->{numeric},        '3/4');
187*0Sstevel@tonic-gateis($charinfo->{mirrored},       'N');
188*0Sstevel@tonic-gateis($charinfo->{unicode10},      'FRACTION THREE QUARTERS');
189*0Sstevel@tonic-gateis($charinfo->{comment},        '');
190*0Sstevel@tonic-gateis($charinfo->{upper},          '');
191*0Sstevel@tonic-gateis($charinfo->{lower},          '');
192*0Sstevel@tonic-gateis($charinfo->{title},          '');
193*0Sstevel@tonic-gateis($charinfo->{block},          'Latin-1 Supplement');
194*0Sstevel@tonic-gateis($charinfo->{script},         'Common');
195*0Sstevel@tonic-gate
196*0Sstevel@tonic-gateuse Unicode::UCD qw(charblocks charscripts);
197*0Sstevel@tonic-gate
198*0Sstevel@tonic-gatemy $charblocks = charblocks();
199*0Sstevel@tonic-gate
200*0Sstevel@tonic-gateok(exists $charblocks->{Thai}, 'Thai charblock exists');
201*0Sstevel@tonic-gateis($charblocks->{Thai}->[0]->[0], hex('0e00'));
202*0Sstevel@tonic-gateok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
203*0Sstevel@tonic-gate
204*0Sstevel@tonic-gatemy $charscripts = charscripts();
205*0Sstevel@tonic-gate
206*0Sstevel@tonic-gateok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
207*0Sstevel@tonic-gateis($charscripts->{Armenian}->[0]->[0], hex('0531'));
208*0Sstevel@tonic-gateok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
209*0Sstevel@tonic-gate
210*0Sstevel@tonic-gatemy $charscript;
211*0Sstevel@tonic-gate
212*0Sstevel@tonic-gate$charscript = charscript("12ab");
213*0Sstevel@tonic-gateis($charscript, 'Ethiopic', 'Ethiopic charscript');
214*0Sstevel@tonic-gate
215*0Sstevel@tonic-gate$charscript = charscript("0x12ab");
216*0Sstevel@tonic-gateis($charscript, 'Ethiopic');
217*0Sstevel@tonic-gate
218*0Sstevel@tonic-gate$charscript = charscript("U+12ab");
219*0Sstevel@tonic-gateis($charscript, 'Ethiopic');
220*0Sstevel@tonic-gate
221*0Sstevel@tonic-gatemy $ranges;
222*0Sstevel@tonic-gate
223*0Sstevel@tonic-gate$ranges = charscript('Ogham');
224*0Sstevel@tonic-gateis($ranges->[1]->[0], hex('1681'), 'Ogham charscript');
225*0Sstevel@tonic-gateis($ranges->[1]->[1], hex('169a'));
226*0Sstevel@tonic-gate
227*0Sstevel@tonic-gateuse Unicode::UCD qw(charinrange);
228*0Sstevel@tonic-gate
229*0Sstevel@tonic-gate$ranges = charscript('Cherokee');
230*0Sstevel@tonic-gateok(!charinrange($ranges, "139f"), 'Cherokee charscript');
231*0Sstevel@tonic-gateok( charinrange($ranges, "13a0"));
232*0Sstevel@tonic-gateok( charinrange($ranges, "13f4"));
233*0Sstevel@tonic-gateok(!charinrange($ranges, "13f5"));
234*0Sstevel@tonic-gate
235*0Sstevel@tonic-gateis(Unicode::UCD::UnicodeVersion, '4.0.1', 'UnicodeVersion');
236*0Sstevel@tonic-gate
237*0Sstevel@tonic-gateuse Unicode::UCD qw(compexcl);
238*0Sstevel@tonic-gate
239*0Sstevel@tonic-gateok(!compexcl(0x0100), 'compexcl');
240*0Sstevel@tonic-gateok( compexcl(0x0958));
241*0Sstevel@tonic-gate
242*0Sstevel@tonic-gateuse Unicode::UCD qw(casefold);
243*0Sstevel@tonic-gate
244*0Sstevel@tonic-gatemy $casefold;
245*0Sstevel@tonic-gate
246*0Sstevel@tonic-gate$casefold = casefold(0x41);
247*0Sstevel@tonic-gate
248*0Sstevel@tonic-gateok($casefold->{code} eq '0041' &&
249*0Sstevel@tonic-gate   $casefold->{status} eq 'C'  &&
250*0Sstevel@tonic-gate   $casefold->{mapping} eq '0061', 'casefold 0x41');
251*0Sstevel@tonic-gate
252*0Sstevel@tonic-gate$casefold = casefold(0xdf);
253*0Sstevel@tonic-gate
254*0Sstevel@tonic-gateok($casefold->{code} eq '00DF' &&
255*0Sstevel@tonic-gate   $casefold->{status} eq 'F'  &&
256*0Sstevel@tonic-gate   $casefold->{mapping} eq '0073 0073', 'casefold 0xDF');
257*0Sstevel@tonic-gate
258*0Sstevel@tonic-gateok(!casefold(0x20));
259*0Sstevel@tonic-gate
260*0Sstevel@tonic-gateuse Unicode::UCD qw(casespec);
261*0Sstevel@tonic-gate
262*0Sstevel@tonic-gatemy $casespec;
263*0Sstevel@tonic-gate
264*0Sstevel@tonic-gateok(!casespec(0x41));
265*0Sstevel@tonic-gate
266*0Sstevel@tonic-gate$casespec = casespec(0xdf);
267*0Sstevel@tonic-gate
268*0Sstevel@tonic-gateok($casespec->{code} eq '00DF' &&
269*0Sstevel@tonic-gate   $casespec->{lower} eq '00DF'  &&
270*0Sstevel@tonic-gate   $casespec->{title} eq '0053 0073'  &&
271*0Sstevel@tonic-gate   $casespec->{upper} eq '0053 0053' &&
272*0Sstevel@tonic-gate   $casespec->{condition} eq undef, 'casespec 0xDF');
273*0Sstevel@tonic-gate
274*0Sstevel@tonic-gate$casespec = casespec(0x307);
275*0Sstevel@tonic-gate
276*0Sstevel@tonic-gateok($casespec->{az}->{code} eq '0307' &&
277*0Sstevel@tonic-gate   $casespec->{az}->{lower} eq ''  &&
278*0Sstevel@tonic-gate   $casespec->{az}->{title} eq '0307'  &&
279*0Sstevel@tonic-gate   $casespec->{az}->{upper} eq '0307' &&
280*0Sstevel@tonic-gate   $casespec->{az}->{condition} eq 'az After_I',
281*0Sstevel@tonic-gate   'casespec 0x307');
282*0Sstevel@tonic-gate
283*0Sstevel@tonic-gate# perl #7305 UnicodeCD::compexcl is weird
284*0Sstevel@tonic-gate
285*0Sstevel@tonic-gatefor (1) {$a=compexcl $_}
286*0Sstevel@tonic-gateok(1, 'compexcl read-only $_: perl #7305');
287*0Sstevel@tonic-gategrep {compexcl $_} %{{1=>2}};
288*0Sstevel@tonic-gateok(1, 'compexcl read-only hash: perl #7305');
289*0Sstevel@tonic-gate
290*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('123'),     123, "_getcode(123)");
291*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('0123'),  0x123, "_getcode(0123)");
292*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)");
293*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)");
294*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)");
295*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)");
296*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('U+1234'),   0x1234, "_getcode(U+1234)");
297*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)");
298*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('123x'),    undef, "_getcode(123x)");
299*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('x123'),    undef, "_getcode(x123)");
300*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('0x123x'),  undef, "_getcode(x123)");
301*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('U+123x'),  undef, "_getcode(x123)");
302*0Sstevel@tonic-gate
303*0Sstevel@tonic-gate{
304*0Sstevel@tonic-gate    my $r1 = charscript('Latin');
305*0Sstevel@tonic-gate    my $n1 = @$r1;
306*0Sstevel@tonic-gate    is($n1, 26, "26 ranges in Latin script (Unicode 4.0.0)");
307*0Sstevel@tonic-gate    shift @$r1 while @$r1;
308*0Sstevel@tonic-gate    my $r2 = charscript('Latin');
309*0Sstevel@tonic-gate    is(@$r2, $n1, "modifying results should not mess up internal caches");
310*0Sstevel@tonic-gate}
311*0Sstevel@tonic-gate
312*0Sstevel@tonic-gate{
313*0Sstevel@tonic-gate	is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD");
314*0Sstevel@tonic-gate}