Lines Matching full:is
33 is(charinfo(0x110000), undef, "Verify charinfo() of non-unicode is undef");
35 …is(lc charprop(0x110000, 'age'), lc "Unassigned", "Verify charprop(age) of non-unicode is Unassign…
36 …is(charprop(0x110000, 'in'), "Unassigned", "Verify charprop(in), a bipartite Perl extension, works…
38 is(charprop(0x110000, 'Any'), undef, "Verify charprop of non-bipartite Perl extension returns undef…
41 $charinfo = charinfo($cp); # Null is often problematic, so test it.
43 is($charinfo->{code}, "0000",
45 is($charinfo->{name}, "<control>");
46 is(charprop($cp, "name"), "");
50 is(charprop($cp, "name_alias"), "NULL: control,NUL: abbreviation");
52 is($charinfo->{category}, "Cc");
53 is(charprop($cp, "category"), "Control");
54 is($charinfo->{combining}, "0");
55 is(charprop($cp, "ccc"), "Not_Reordered");
56 is($charinfo->{bidi}, "BN");
57 is(charprop($cp, "bc"), "Boundary_Neutral");
58 is($charinfo->{decomposition}, "");
59 is(charprop($cp, "dm"), "\0");
60 is($charinfo->{decimal}, "");
61 is($charinfo->{digit}, "");
62 is($charinfo->{numeric}, "");
63 is(charprop($cp, "nv"), "NaN");
64 is($charinfo->{mirrored}, "N");
65 is(charprop($cp, "bidim"), "No");
66 is($charinfo->{unicode10}, "NULL");
67 is(charprop($cp, "na1"), "NULL");
68 is($charinfo->{comment}, "");
69 is(charprop($cp, "isc"), "");
70 is($charinfo->{upper}, "");
71 is(charprop($cp, "uc"), "\0");
72 is($charinfo->{lower}, "");
73 is(charprop($cp, "lc"), "\0");
74 is($charinfo->{title}, "");
75 is(charprop($cp, "tc"), "\0");
76 is($charinfo->{block}, "Basic Latin");
77 is(charprop($cp, "block"), "Basic_Latin");
78 is($charinfo->{script}, "Common") if $v_unicode_version gt v3.0.1;
79 is(charprop($cp, "script"), "Common") if $v_unicode_version gt v3.0.1;
86 is($charinfo->{code}, $A_code, "LATIN CAPITAL LETTER A");
87 is($charinfo->{name}, "LATIN CAPITAL LETTER A");
88 is(charprop($cp, 'name'), "LATIN CAPITAL LETTER A");
89 is($charinfo->{category}, "Lu");
90 is(charprop($cp, 'gc'), "Uppercase_Letter");
91 is($charinfo->{combining}, "0");
92 is(charprop($cp, 'ccc'), "Not_Reordered");
93 is($charinfo->{bidi}, "L");
94 is(charprop($cp, 'bc'), "Left_To_Right");
95 is($charinfo->{decomposition}, "");
96 is(charprop($cp, 'dm'), "A");
97 is($charinfo->{decimal}, "");
98 is($charinfo->{digit}, "");
99 is($charinfo->{numeric}, "");
100 is(charprop($cp, 'nv'), "NaN");
101 is($charinfo->{mirrored}, "N");
102 is(charprop($cp, 'bidim'), "No");
103 is($charinfo->{unicode10}, "");
104 is(charprop($cp, 'na1'), "");
105 is($charinfo->{comment}, "");
106 is(charprop($cp, 'isc'), "");
107 is($charinfo->{upper}, "");
108 is(charprop($cp, 'uc'), "A");
109 is($charinfo->{lower}, $a_code);
110 is(charprop($cp, 'lc'), "a");
111 is($charinfo->{title}, "");
112 is(charprop($cp, 'tc'), "A");
113 is($charinfo->{block}, "Basic Latin");
114 is(charprop($cp, 'block'), "Basic_Latin");
115 is($charinfo->{script}, "Latin") if $v_unicode_version gt v3.0.1;
116 is(charprop($cp, 'script'), "Latin") if $v_unicode_version gt v3.0.1;
121 is($charinfo->{code}, "0100", "LATIN CAPITAL LETTER A WITH MACRON");
122 is($charinfo->{name}, "LATIN CAPITAL LETTER A WITH MACRON");
123 is(charprop($cp, 'name'), "LATIN CAPITAL LETTER A WITH MACRON");
124 is($charinfo->{category}, "Lu");
125 is(charprop($cp, 'gc'), "Uppercase_Letter");
126 is($charinfo->{combining}, "0");
127 is(charprop($cp, 'ccc'), "Not_Reordered");
128 is($charinfo->{bidi}, "L");
129 is(charprop($cp, 'bc'), "Left_To_Right");
130 is($charinfo->{decomposition}, "$A_code 0304");
131 is(charprop($cp, 'dm'), "A\x{0304}");
132 is($charinfo->{decimal}, "");
133 is($charinfo->{digit}, "");
134 is($charinfo->{numeric}, "");
135 is(charprop($cp, 'nv'), "NaN");
136 is($charinfo->{mirrored}, "N");
137 is(charprop($cp, 'bidim'), "No");
138 is($charinfo->{unicode10}, "LATIN CAPITAL LETTER A MACRON");
139 is(charprop($cp, 'na1'), "LATIN CAPITAL LETTER A MACRON");
140 is($charinfo->{comment}, "");
141 is(charprop($cp, 'isc'), "");
142 is($charinfo->{upper}, "");
143 is(charprop($cp, 'uc'), "\x{100}");
144 is($charinfo->{lower}, "0101");
145 is(charprop($cp, 'lc'), "\x{101}");
146 is($charinfo->{title}, "");
147 is(charprop($cp, 'tc'), "\x{100}");
148 is($charinfo->{block}, "Latin Extended-A");
149 is(charprop($cp, 'block'), "Latin_Extended_A");
150 is($charinfo->{script}, "Latin") if $v_unicode_version gt v3.0.1;
151 is(charprop($cp, 'script'), "Latin") if $v_unicode_version gt v3.0.1;
153 $cp = 0x590; # 0x0590 is in the Hebrew block but unused.
156 is($charinfo->{code}, undef, "0x0590 - unused Hebrew");
157 is($charinfo->{name}, undef);
158 is(charprop($cp, 'name'), "");
159 is($charinfo->{category}, undef);
160 is(charprop($cp, 'gc'), "Unassigned");
161 is($charinfo->{combining}, undef);
162 is(charprop($cp, 'ccc'), "Not_Reordered");
163 is($charinfo->{bidi}, undef);
165 is(charprop($cp, 'bc'), "Right_To_Left");
167 is($charinfo->{decomposition}, undef);
168 is(charprop($cp, 'dm'), "\x{590}");
169 is($charinfo->{decimal}, undef);
170 is($charinfo->{digit}, undef);
171 is($charinfo->{numeric}, undef);
172 is(charprop($cp, 'nv'), "NaN");
173 is($charinfo->{mirrored}, undef);
174 is(charprop($cp, 'bidim'), "No");
175 is($charinfo->{unicode10}, undef);
176 is(charprop($cp, 'na1'), "");
177 is($charinfo->{comment}, undef);
178 is(charprop($cp, 'isc'), "");
179 is($charinfo->{upper}, undef);
180 is(charprop($cp, 'uc'), "\x{590}");
181 is($charinfo->{lower}, undef);
182 is(charprop($cp, 'lc'), "\x{590}");
183 is($charinfo->{title}, undef);
184 is(charprop($cp, 'tc'), "\x{590}");
185 is($charinfo->{block}, undef);
186 is(charprop($cp, 'block'), "Hebrew");
187 is($charinfo->{script}, undef);
188 is(charprop($cp, 'script'), $unknown_script) if $v_unicode_version gt
191 # 0x05d0 is in the Hebrew block and used.
196 is($charinfo->{code}, "05D0", "05D0 - used Hebrew");
197 is($charinfo->{name}, "HEBREW LETTER ALEF");
198 is(charprop($cp, 'name'), "HEBREW LETTER ALEF");
199 is($charinfo->{category}, "Lo");
200 is(charprop($cp, 'gc'), "Other_Letter");
201 is($charinfo->{combining}, "0");
202 is(charprop($cp, 'ccc'), "Not_Reordered");
203 is($charinfo->{bidi}, "R");
204 is(charprop($cp, 'bc'), "Right_To_Left");
205 is($charinfo->{decomposition}, "");
206 is(charprop($cp, 'dm'), "\x{5d0}");
207 is($charinfo->{decimal}, "");
208 is($charinfo->{digit}, "");
209 is($charinfo->{numeric}, "");
210 is(charprop($cp, 'nv'), "NaN");
211 is($charinfo->{mirrored}, "N");
212 is(charprop($cp, 'bidim'), "No");
213 is($charinfo->{unicode10}, "");
214 is(charprop($cp, 'na1'), "");
215 is($charinfo->{comment}, "");
216 is(charprop($cp, 'isc'), "");
217 is($charinfo->{upper}, "");
218 is(charprop($cp, 'uc'), "\x{5d0}");
219 is($charinfo->{lower}, "");
220 is(charprop($cp, 'lc'), "\x{5d0}");
221 is($charinfo->{title}, "");
222 is(charprop($cp, 'tc'), "\x{5d0}");
223 is($charinfo->{block}, "Hebrew");
224 is(charprop($cp, 'block'), "Hebrew");
225 is($charinfo->{script}, "Hebrew") if $v_unicode_version gt v3.0.1;
226 is(charprop($cp, 'script'), "Hebrew") if $v_unicode_version gt v3.0.1;
233 is($charinfo->{code}, "AC00", "HANGUL SYLLABLE U+AC00");
234 is($charinfo->{name}, "HANGUL SYLLABLE GA");
235 is(charprop($cp, 'name'), "HANGUL SYLLABLE GA");
236 is($charinfo->{category}, "Lo");
237 is(charprop($cp, 'gc'), "Other_Letter");
238 is($charinfo->{combining}, "0");
239 is(charprop($cp, 'ccc'), "Not_Reordered");
240 is($charinfo->{bidi}, "L");
241 is(charprop($cp, 'bc'), "Left_To_Right");
242 is($charinfo->{decomposition}, "1100 1161");
243 is(charprop($cp, 'dm'), "\x{1100}\x{1161}");
244 is($charinfo->{decimal}, "");
245 is($charinfo->{digit}, "");
246 is($charinfo->{numeric}, "");
247 is(charprop($cp, 'nv'), "NaN");
248 is($charinfo->{mirrored}, "N");
249 is(charprop($cp, 'bidim'), "No");
250 is($charinfo->{unicode10}, "");
251 is(charprop($cp, 'na1'), "");
252 is($charinfo->{comment}, "");
253 is(charprop($cp, 'isc'), "");
254 is($charinfo->{upper}, "");
255 is(charprop($cp, 'uc'), "\x{AC00}");
256 is($charinfo->{lower}, "");
257 is(charprop($cp, 'lc'), "\x{AC00}");
258 is($charinfo->{title}, "");
259 is(charprop($cp, 'tc'), "\x{AC00}");
260 is($charinfo->{block}, "Hangul Syllables");
261 is(charprop($cp, 'block'), "Hangul_Syllables");
262 is($charinfo->{script}, "Hangul") if $v_unicode_version gt v3.0.1;
263 is(charprop($cp, 'script'), "Hangul") if $v_unicode_version gt v3.0.1;
270 is($charinfo->{code}, "AE00", "HANGUL SYLLABLE U+AE00");
271 is($charinfo->{name}, "HANGUL SYLLABLE GEUL");
272 is(charprop($cp, 'name'), "HANGUL SYLLABLE GEUL");
273 is($charinfo->{category}, "Lo");
274 is(charprop($cp, 'gc'), "Other_Letter");
275 is($charinfo->{combining}, "0");
276 is(charprop($cp, 'ccc'), "Not_Reordered");
277 is($charinfo->{bidi}, "L");
278 is(charprop($cp, 'bc'), "Left_To_Right");
279 is($charinfo->{decomposition}, "1100 1173 11AF");
280 is(charprop($cp, 'dm'), "\x{1100}\x{1173}\x{11AF}");
281 is($charinfo->{decimal}, "");
282 is($charinfo->{digit}, "");
283 is($charinfo->{numeric}, "");
284 is(charprop($cp, 'nv'), "NaN");
285 is($charinfo->{mirrored}, "N");
286 is(charprop($cp, 'bidim'), "No");
287 is($charinfo->{unicode10}, "");
288 is(charprop($cp, 'na1'), "");
289 is($charinfo->{comment}, "");
290 is(charprop($cp, 'isc'), "");
291 is($charinfo->{upper}, "");
292 is(charprop($cp, 'uc'), "\x{AE00}");
293 is($charinfo->{lower}, "");
294 is(charprop($cp, 'lc'), "\x{AE00}");
295 is($charinfo->{title}, "");
296 is(charprop($cp, 'tc'), "\x{AE00}");
297 is($charinfo->{block}, "Hangul Syllables");
298 is(charprop($cp, 'block'), "Hangul_Syllables");
299 is($charinfo->{script}, "Hangul") if $v_unicode_version gt v3.0.1;
300 is(charprop($cp, 'script'), "Hangul") if $v_unicode_version gt v3.0.1;
306 is($charinfo->{code}, "1D400", "MATHEMATICAL BOLD CAPITAL A");
307 is($charinfo->{name}, "MATHEMATICAL BOLD CAPITAL A");
308 is(charprop($cp, 'name'), "MATHEMATICAL BOLD CAPITAL A");
309 is($charinfo->{category}, "Lu");
310 is(charprop($cp, 'gc'), "Uppercase_Letter");
311 is($charinfo->{combining}, "0");
312 is(charprop($cp, 'ccc'), "Not_Reordered");
313 is($charinfo->{bidi}, "L");
314 is(charprop($cp, 'bc'), "Left_To_Right");
315 is($charinfo->{decomposition}, "<font> $A_code");
316 is(charprop($cp, 'dm'), "A");
317 is($charinfo->{decimal}, "");
318 is($charinfo->{digit}, "");
319 is($charinfo->{numeric}, "");
320 is(charprop($cp, 'nv'), "NaN");
321 is($charinfo->{mirrored}, "N");
322 is(charprop($cp, 'bidim'), "No");
323 is($charinfo->{unicode10}, "");
324 is(charprop($cp, 'na1'), "");
325 is($charinfo->{comment}, "");
326 is(charprop($cp, 'isc'), "");
327 is($charinfo->{upper}, "");
328 is(charprop($cp, 'uc'), "\x{1D400}");
329 is($charinfo->{lower}, "");
330 is(charprop($cp, 'lc'), "\x{1D400}");
331 is($charinfo->{title}, "");
332 is(charprop($cp, 'tc'), "\x{1D400}");
333 is($charinfo->{block}, "Mathematical Alphanumeric Symbols");
334 is(charprop($cp, 'block'), "Mathematical_Alphanumeric_Symbols");
335 is($charinfo->{script}, "Common");
336 is(charprop($cp, 'script'), "Common");
343 is($charinfo->{code}, "9FBA", "U+9FBA");
344 is($charinfo->{name}, "CJK UNIFIED IDEOGRAPH-9FBA");
345 is(charprop($cp, 'name'), "CJK UNIFIED IDEOGRAPH-9FBA");
346 is($charinfo->{category}, "Lo");
347 is(charprop($cp, 'gc'), "Other_Letter");
348 is($charinfo->{combining}, "0");
349 is(charprop($cp, 'ccc'), "Not_Reordered");
350 is($charinfo->{bidi}, "L");
351 is(charprop($cp, 'bc'), "Left_To_Right");
352 is($charinfo->{decomposition}, "");
353 is(charprop($cp, 'dm'), "\x{9FBA}");
354 is($charinfo->{decimal}, "");
355 is($charinfo->{digit}, "");
356 is($charinfo->{numeric}, "");
357 is(charprop($cp, 'nv'), "NaN");
358 is($charinfo->{mirrored}, "N");
359 is(charprop($cp, 'bidim'), "No");
360 is($charinfo->{unicode10}, "");
361 is(charprop($cp, 'na1'), "");
362 is($charinfo->{comment}, "");
363 is(charprop($cp, 'isc'), "");
364 is($charinfo->{upper}, "");
365 is(charprop($cp, 'uc'), "\x{9FBA}");
366 is($charinfo->{lower}, "");
367 is(charprop($cp, 'lc'), "\x{9FBA}");
368 is($charinfo->{title}, "");
369 is(charprop($cp, 'tc'), "\x{9FBA}");
370 is($charinfo->{block}, "CJK Unified Ideographs");
371 is(charprop($cp, 'block'), "CJK_Unified_Ideographs");
372 is($charinfo->{script}, "Han");
373 is(charprop($cp, 'script'), "Han");
378 # 0x0590 is in the Hebrew block but unused.
380 is(charblock(0x590), "Hebrew", "0x0590 - Hebrew unused charblock");
381 is(charscript(0x590), $unknown_script, "0x0590 - Hebrew unused charscript") if $v_unicode_v…
382 is(charblock(0x1FFFF), "No_Block", "0x1FFFF - unused charblock");
387 is(charblock(chr(0x6237)), undef,
397 is($charinfo->{code}, $fraction_3_4_code, "VULGAR FRACTION THREE QUARTERS");
398 is($charinfo->{name}, "VULGAR FRACTION THREE QUARTERS");
399 is(charprop($cp, 'name'), "VULGAR FRACTION THREE QUARTERS");
400 is($charinfo->{category}, "No");
401 is(charprop($cp, 'gc'), "Other_Number");
402 is($charinfo->{combining}, "0");
403 is(charprop($cp, 'ccc'), "Not_Reordered");
404 is($charinfo->{bidi}, "ON");
405 is(charprop($cp, 'bc'), "Other_Neutral");
406 is($charinfo->{decomposition}, "<fraction> "
410 is(charprop($cp, 'dm'), "3\x{2044}4");
411 is($charinfo->{decimal}, "");
412 is($charinfo->{digit}, "");
413 is($charinfo->{numeric}, "3/4");
414 is(charprop($cp, 'nv'), "0.75");
415 is($charinfo->{mirrored}, "N");
416 is(charprop($cp, 'bidim'), "No");
417 is($charinfo->{unicode10}, "FRACTION THREE QUARTERS");
418 is(charprop($cp, 'na1'), "FRACTION THREE QUARTERS");
419 is($charinfo->{comment}, "");
420 is(charprop($cp, 'isc'), "");
421 is($charinfo->{upper}, "");
422 is(charprop($cp, 'uc'), chr hex $cp);
423 is($charinfo->{lower}, "");
424 is(charprop($cp, 'lc'), chr hex $cp);
425 is($charinfo->{title}, "");
426 is(charprop($cp, 'tc'), chr hex $cp);
427 is($charinfo->{block}, "Latin-1 Supplement");
428 is(charprop($cp, 'block'), "Latin_1_Supplement");
429 is($charinfo->{script}, "Common") if $v_unicode_version gt v3.0.1;
430 is(charprop($cp, 'script'), "Common") if $v_unicode_version gt v3.0.1;
432 # This is to test a case where both simple and full lowercases exist and
439 is($charinfo->{code}, "0130", "LATIN CAPITAL LETTER I WITH DOT ABOVE");
440 is($charinfo->{name}, "LATIN CAPITAL LETTER I WITH DOT ABOVE");
441 is(charprop($cp, 'name'), "LATIN CAPITAL LETTER I WITH DOT ABOVE");
442 is($charinfo->{category}, "Lu");
443 is(charprop($cp, 'gc'), "Uppercase_Letter");
444 is($charinfo->{combining}, "0");
445 is(charprop($cp, 'ccc'), "Not_Reordered");
446 is($charinfo->{bidi}, "L");
447 is(charprop($cp, 'bc'), "Left_To_Right");
448 is($charinfo->{decomposition}, "$I_code 0307");
449 is(charprop($cp, 'dm'), "I\x{0307}");
450 is($charinfo->{decimal}, "");
451 is($charinfo->{digit}, "");
452 is($charinfo->{numeric}, "");
453 is(charprop($cp, 'nv'), "NaN");
454 is($charinfo->{mirrored}, "N");
455 is(charprop($cp, 'bidim'), "No");
456 is($charinfo->{unicode10}, "LATIN CAPITAL LETTER I DOT");
457 is(charprop($cp, 'na1'), "LATIN CAPITAL LETTER I DOT");
458 is($charinfo->{comment}, "");
459 is(charprop($cp, 'isc'), "");
460 is($charinfo->{upper}, "");
461 is(charprop($cp, 'uc'), "\x{130}");
462 is($charinfo->{lower}, $i_code);
463 is(charprop($cp, 'lc'), "i\x{307}") if $v_unicode_version ge v3.2.0;
464 is($charinfo->{title}, "");
465 is(charprop($cp, 'tc'), "\x{130}");
466 is($charinfo->{block}, "Latin Extended-A");
467 is(charprop($cp, 'block'), "Latin_Extended_A");
468 is($charinfo->{script}, "Latin") if $v_unicode_version gt v3.0.1;
469 is(charprop($cp, 'script'), "Latin") if $v_unicode_version gt v3.0.1;
471 # This is to test a case where both simple and full uppercases exist and
476 is($charinfo->{code}, "1F80", "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI");
477 is($charinfo->{name}, "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI");
478 is(charprop($cp, "name"), "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI");
479 is($charinfo->{category}, "Ll");
480 is(charprop($cp, "gc"), "Lowercase_Letter");
481 is($charinfo->{combining}, "0");
482 is(charprop($cp, "ccc"), "Not_Reordered");
483 is($charinfo->{bidi}, "L");
484 is(charprop($cp, "bc"), "Left_To_Right");
485 is($charinfo->{decomposition}, "1F00 0345");
486 is(charprop($cp, "dm"), "\x{1F00}\x{0345}");
487 is($charinfo->{decimal}, "");
488 is($charinfo->{digit}, "");
489 is($charinfo->{numeric}, "");
490 is(charprop($cp, "nv"), "NaN");
491 is($charinfo->{mirrored}, "N");
492 is(charprop($cp, "bidim"), "No");
493 is($charinfo->{unicode10}, "");
494 is(charprop($cp, "na1"), "");
495 is($charinfo->{comment}, "");
496 is(charprop($cp, "isc"), "");
497 is($charinfo->{upper}, "1F88");
498 is(charprop($cp, "uc"), "\x{1F08}\x{0399}");
499 is(charprop($cp, "suc"), "\x{1F88}");
500 is($charinfo->{lower}, "");
501 is(charprop($cp, "lc"), "\x{1F80}");
502 is($charinfo->{title}, "1F88");
503 is(charprop($cp, "tc"), "\x{1F88}");
504 is($charinfo->{block}, "Greek Extended");
505 is(charprop($cp, "block"), "Greek_Extended");
506 is($charinfo->{script}, "Greek") if $v_unicode_version gt v3.0.1;
507 is(charprop($cp, "script"), "Greek") if $v_unicode_version gt v3.0.1;
509 is(charprop(ord("A"), "foo"), undef,
514 is(charprop(ord("("), "bpb"), ")",
517 is(charprop(ord("9"), "nv"), 9,
520 is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "",
525 is($mark_props_ref->{'Bidi_Class'}, "Nonspacing_Mark",
527 is($mark_props_ref->{'Bidi_Mirrored'}, "No");
528 is($mark_props_ref->{'Canonical_Combining_Class'}, "Above");
529 is($mark_props_ref->{'Case_Folding'}, "\x{300}");
530 is($mark_props_ref->{'Decomposition_Mapping'}, "\x{300}");
531 is($mark_props_ref->{'Decomposition_Type'}, ($v_unicode_version le v4.0.0)
534 is($mark_props_ref->{'General_Category'}, "Nonspacing_Mark");
536 is($mark_props_ref->{'ISO_Comment'}, "");
538 is($mark_props_ref->{'Lowercase_Mapping'}, "\x{300}");
539 is($mark_props_ref->{'Name'}, "COMBINING GRAVE ACCENT");
540 is($mark_props_ref->{'Numeric_Type'}, "None");
541 is($mark_props_ref->{'Numeric_Value'}, "NaN");
542 is($mark_props_ref->{'Simple_Case_Folding'}, "\x{300}");
543 is($mark_props_ref->{'Simple_Lowercase_Mapping'}, "\x{300}");
544 is($mark_props_ref->{'Simple_Titlecase_Mapping'}, "\x{300}");
545 is($mark_props_ref->{'Simple_Uppercase_Mapping'}, "\x{300}");
546 is($mark_props_ref->{'Titlecase_Mapping'}, "\x{300}");
547 is($mark_props_ref->{'Unicode_1_Name'}, "NON-SPACING GRAVE");
548 is($mark_props_ref->{'Uppercase_Mapping'}, "\x{300}");
555 is($charblocks->{Thai}->[0]->[0], hex('0e00'));
562 is($charscripts->{Armenian}->[0]->[0], hex('0531'));
568 is($charscript, 'Ethiopic', 'Ethiopic charscript');
571 is($charscript, 'Ethiopic');
574 is($charscript, 'Ethiopic');
580 is($ranges->[0]->[0], hex('1680'), 'Ogham charscript');
581 is($ranges->[0]->[1], hex('169C'));
598 is($gc->{L}, 'Letter', 'L is Letter');
599 is($gc->{Lu}, 'UppercaseLetter', 'Lu is UppercaseLetter');
606 is($bt->{L}, 'Left-to-Right', 'L is Left-to-Right');
607 is($bt->{AL}, 'Right-to-Left Arabic', 'AL is Right-to-Left Arabic');
627 is($casefold->{code}, $A_code, 'casefold native(0x41) code');
628 is($casefold->{status}, 'C', 'casefold native(0x41) status');
629 is($casefold->{mapping}, $a_code, 'casefold native(0x41) mapping');
630 is($casefold->{full}, $a_code, 'casefold native(0x41) full');
631 is($casefold->{simple}, $a_code, 'casefold native(0x41) simple');
632 is($casefold->{turkic}, "", 'casefold native(0x41) turkic');
642 is($casefold->{code}, $sharp_s_code, 'casefold native(0xDF) code');
643 is($casefold->{status}, 'F', 'casefold native(0xDF) status');
644 is($casefold->{mapping}, "$s_code $s_code", 'casefold native(0xDF) mapping');
645 is($casefold->{full}, "$s_code $s_code", 'casefold native(0xDF) full');
646 is($casefold->{simple}, "", 'casefold native(0xDF) simple');
647 is($casefold->{turkic}, "", 'casefold native(0xDF) turkic');
656 is($casefold->{code}, '0130', 'casefold 0x130 code');
657 is($casefold->{status}, 'C' , 'casefold 0x130 status');
658 is($casefold->{mapping}, $i_code, 'casefold 0x130 mapping');
659 is($casefold->{full}, $i_code, 'casefold 0x130 full');
660 is($casefold->{simple}, $i_code, 'casefold 0x130 simple');
661 is($casefold->{turkic}, "", 'casefold 0x130 turkic');
665 is($casefold->{code}, '0131', 'casefold 0x131 code');
666 is($casefold->{status}, 'C' , 'casefold 0x131 status');
667 is($casefold->{mapping}, $i_code, 'casefold 0x131 mapping');
668 is($casefold->{full}, $i_code, 'casefold 0x131 full');
669 is($casefold->{simple}, $i_code, 'casefold 0x131 simple');
670 is($casefold->{turkic}, "", 'casefold 0x131 turkic');
675 is($casefold->{code}, '0130', 'casefold 0x130 code');
676 is($casefold->{status}, 'I' , 'casefold 0x130 status');
677 is($casefold->{mapping}, $i_code, 'casefold 0x130 mapping');
678 is($casefold->{full}, $i_code, 'casefold 0x130 full');
679 is($casefold->{simple}, $i_code, 'casefold 0x130 simple');
680 is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
684 is($casefold->{code}, '0131', 'casefold 0x131 code');
685 is($casefold->{status}, 'I' , 'casefold 0x131 status');
686 is($casefold->{mapping}, $i_code, 'casefold 0x131 mapping');
687 is($casefold->{full}, $i_code, 'casefold 0x131 full');
688 is($casefold->{simple}, $i_code, 'casefold 0x131 simple');
689 is($casefold->{turkic}, $i_code, 'casefold 0x131 turkic');
693 is($casefold->{code}, $I_code, 'casefold native(0x49) code');
694 is($casefold->{status}, 'C' , 'casefold native(0x49) status');
695 is($casefold->{mapping}, $i_code, 'casefold native(0x49) mapping');
696 is($casefold->{full}, $i_code, 'casefold native(0x49) full');
697 is($casefold->{simple}, $i_code, 'casefold native(0x49) simple');
698 is($casefold->{turkic}, "0131", 'casefold native(0x49) turkic');
702 is($casefold->{code}, '0130', 'casefold 0x130 code');
703 is($casefold->{status}, 'F' , 'casefold 0x130 status');
704 is($casefold->{mapping}, "$i_code 0307", 'casefold 0x130 mapping');
705 is($casefold->{full}, "$i_code 0307", 'casefold 0x130 full');
706 is($casefold->{simple}, "", 'casefold 0x130 simple');
707 is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
713 is($casefold->{code}, '1F88', 'casefold 0x1F88 code');
714 is($casefold->{status}, 'S' , 'casefold 0x1F88 status');
715 is($casefold->{mapping}, '1F80', 'casefold 0x1F88 mapping');
716 is($casefold->{full}, '1F00 03B9', 'casefold 0x1F88 full');
717 is($casefold->{simple}, '1F80', 'casefold 0x1F88 simple');
718 is($casefold->{turkic}, "", 'casefold 0x1F88 turkic');
751 # perl #7305 UnicodeCD::compexcl is weird
758 is(Unicode::UCD::_getcode('123'), 123, "_getcode(123)");
759 is(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)");
760 is(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)");
761 is(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)");
762 is(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)");
763 is(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)");
764 is(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)");
765 is(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)");
766 is(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)");
767 is(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)");
768 is(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)");
769 is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)");
778 is(charscript(chr(0x6237)), undef,
789 is($n1, 39, "number of ranges in Latin script (Unicode $expected_version)") if $::IS_ASCII;
792 is(@$r2, $n1, "modifying results should not mess up internal caches");
797 is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD");
803 is(namedseq("KEYCAP DIGIT ZERO"), "0\x{FE0F}\x{20E3}",
805 is(namedseq("KATAKANA LETTER AINU P"), "\x{31F7}\x{309A}", "namedseq");
806 is(namedseq("KATAKANA LETTER AINU Q"), undef);
807 is(namedseq(), undef);
808 is(namedseq(qw(foo bar)), undef);
810 is(scalar @ns, 2);
811 is($ns[0], 0x31F7);
812 is($ns[1], 0x309A);
814 is($ns{"KATAKANA LETTER AINU P"}, "\x{31F7}\x{309A}");
816 is(@ns, 0);
822 # is run on an older version.
825 is(num("0"), 0, 'Verify num("0") == 0');
826 is(num("0", \$ret_len), 0, 'Verify num("0", \$ret_len) == 0');
827 is($ret_len, 1, "... and the returned length is 1");
829 is($ret_len, 0, "... and the returned length is 0");
831 is($ret_len, 0, "... and the returned length is 0");
832 is(num("98765", \$ret_len), 98765, 'Verify num("98765") == 98765');
833 is($ret_len, 5, "... and the returned length is 5");
836 is($ret_len, 5, "... but the returned length is 5");
841 is(num($tai_lue_2), 2, 'Verify num("\N{NEW TAI LUE DIGIT TWO}") == 2');
842 is(num($tai_lue_1), 1, 'Verify num("\N{NEW TAI LUE DIGIT ONE}") == 1');
843 is(num($tai_lue_2 . $tai_lue_1), 21,
850 is($ret_len, 1, "... but the returned length is 1");
854 is($ret_len, 1, "... but the returned length is 1");
858 is(num($cham_0 . charnames::string_vianame("CHAM DIGIT THREE")), 3,
865 is($ret_len, 1, "... but the returned length is 1");
868 is(num("\N{SUPERSCRIPT TWO}"), 2, 'Verify num("\N{SUPERSCRIPT TWO} == 2');
870 is(num(charnames::string_vianame("ETHIOPIC NUMBER TEN THOUSAND")), 10000,
874 is(num(charnames::string_vianame("NORTH INDIC FRACTION ONE HALF")),
877 is(num("\N{U+12448}"), 9, 'Verify num("\N{U+12448}") == 9');
879 if ($v_unicode_version gt v3.2.0) { # Is missing from non-Unihan files before
881 is(num("\N{U+5146}"), 1000000000000,
893 is(prop_aliases(undef), undef, "prop_aliases(undef) returns <undef>");
894 is(prop_aliases("unknown property"), undef,
896 is(prop_aliases("InKana"), undef,
898 is(prop_aliases("Perl_Decomposition_Mapping"), undef, "prop_aliases('Perl_Decomposition_Mapping') r…
899 is(prop_aliases("Perl_Charnames"), undef,
901 is(prop_aliases("isgc"), undef,
902 "prop_aliases('isgc') returns <undef> since is not covered Perl extension");
903 is(prop_aliases("Is_Is_Any"), undef,
904 "prop_aliases('Is_Is_Any') returns <undef> since two is's");
905 is(prop_aliases("ccc=vr"), undef,
925 skip "PropertyAliases.txt is not in this Unicode version", 1 if $v_unicode_version lt v3.2.0;
935 my $count = 0; # 0th field in line is short name; 1th is long name
952 … is(@all_names, 0, "prop_aliases('$mod_name') returns undef since $alias is not installed");
957 diag("'$alias' is unknown to prop_aliases()");
961 if ($count == 0) { # Is short name
966 last unless is($names_via_short[0], $alias,
967 "prop_aliases: '$alias' is the short name for '$mod_name'");
970 elsif ($count == 1) { # Is full name
980 is(prop_aliases($short_name), $alias,
981 "prop_aliases: '$alias' is the long name for '$short_name'");
983 else { # Is another alias
986 "prop_aliases: '$alias' is listed as an alias for '$mod_name'");
1008 pass("prop_aliases: '$lc_name' is listed as an alias for '$mod_name'");
1019 my $stripped = $lc_name =~ s/^is//;
1021 # Could be that the input includes a prefix 'is', which is rarely
1028 # If that didn't work, it could be that it's a block, which is always
1034 # Could check that is a real block, but tests for invmap will
1038 my $message = "prop_aliases: '$lc_name' is listed as an alias for '$mod_name'";
1054 if ($alias =~ /=/) { # Only test one case where there is an equals
1061 is(@list, 0, "prop_aliases: '$lc_name' returns an empty list since it is internal_only");
1064 … is(@list, 0, "prop_aliases: '$lc_name' returns an empty list since is illegal property name");
1068 "prop_aliases: '$lc_name' is listed as an alias for '$alias'");
1074 is(prop_value_aliases("unknown property", "unknown value"), undef,
1076 is(prop_value_aliases(undef, undef), undef,
1078 is((prop_value_aliases("na", "A")), "A", "test that prop_value_aliases returns its input for proper…
1079 is(prop_value_aliases("isgc", "C"), undef, "prop_value_aliases('isgc', 'C') returns <undef> since i…
1080 is(prop_value_aliases("gc", "isC"), undef, "prop_value_aliases('gc', 'isC') returns <undef> since i…
1081 is(prop_value_aliases("Any", "None"), undef, "prop_value_aliases('Any', 'None') returns <undef> sin…
1082 is(prop_value_aliases("lc", "A"), "A", "prop_value_aliases('lc', 'A') returns its input, as docs sa…
1092 skip "PropValueAliases.txt is not in this Unicode version", 1 if $v_unicode_version lt v3.2.0;
1097 # Each examined line in the file is for a single value for a property. We
1112 my $prop = shift @fields; # 0th field is the property,
1114 # 'qc' is short in early versions of the file for any of the quick check
1145 my $count = 0; # 0th field in line (after shifting off the property) is
1146 # short name; 1th is long name
1150 # The property on the lhs of the = is always loosely matched. Add in
1169 # Also, there is a bug in the file in which "n/a" is omitted, and
1171 # is all lower case. Copy the "short" name unto the full one to
1177 # The ccc property in the file is special; has an extra numeric field
1188 …is(prop_value_aliases($prop, $value), undef, "prop_value_aliases('$prop', '$value') returns undef …
1192 …is(prop_value_aliases($prop, $value), undef, "prop_value_aliases('$prop', '$value') returns undef …
1199 # If the value is a number, optionally negative, including a floating
1203 …is(prop_value_aliases($mod_prop, $mod_value), undef, "prop_value_aliases('$mod_prop', '$mod_value'…
1214 …last unless is($names_via_short[0], $value, "prop_value_aliases: In '$prop', '$value' is the short…
1228 …is(prop_value_aliases($prop, $short_name), $value, "'$value' is the long name for prop_value_alias…
1233 …loose_name(lc $value) } prop_value_aliases($prop, $short_name)), "'$value' is listed as an alias f…
1256 else { # Here value is strictly matched.
1264 # Verify that loose matching fails when only strict is called for.
1265 next unless is(prop_value_aliases($prop, "$extra_chars$value"), undef,
1274 # The lhs property is always loosely matched, so add in extra
1289 "'$value' is listed as an alias for prop_value_aliases('$mod_prop', '$mod_value')");
1321 is($format, 'al', "prop_invmap() format of '$prop' is 'al'");
1322 is($missing, '0', "prop_invmap() missing of '$prop' is '0'");
1323 is($invlist_ref->[1], 0x61, "prop_invmap('$prop') list[1] is 0x61");
1324 is($invmap_ref->[1], 0x41, "prop_invmap('$prop') map[1] is 0x41");
1328 is($format, 's', "prop_invmap() format of '$prop' is 's");
1329 is($missing, 'N', "prop_invmap() missing of '$prop' is 'N'");
1330 is($invlist_ref->[1], 0x41, "prop_invmap('$prop') list[1] is 0x41");
1331 is($invmap_ref->[1], 'Y', "prop_invmap('$prop') map[1] is 'Y'");
1335 is($format, 's', "prop_invmap() format of '$prop' is 's'");
1336 is($missing, 'N', "prop_invmap() missing of '$prop' is 'N'");
1337 is($invlist_ref->[1], 0x61, "prop_invmap('$prop') list[1] is 0x61");
1338 is($invmap_ref->[1], 'Y', "prop_invmap('$prop') map[1] is 'Y'");
1343 is($format, $lc_format, "prop_invmap() format of '$prop' is '$lc_format");
1344 is($missing, '0', "prop_invmap() missing of '$prop' is '0'");
1345 is($invlist_ref->[1], 0x41, "prop_invmap('$prop') list[1] is 0x41");
1346 is($invmap_ref->[1], 0x61, "prop_invmap('$prop') map[1] is 0x61");
1349 # This property is stable and small, so can test all of it
1353 is($format, 's', "prop_invmap() format of '$prop' is 's'");
1354 is($missing, 'N', "prop_invmap() missing of '$prop' is 'N'");
1360 "prop_invmap('$prop') code point list is correct");
1370 "prop_invmap('$prop') code point list is correct");
1373 "prop_invmap('$prop') map list is correct");
1376 is(prop_invlist("Unknown property"), undef, "prop_invlist(<Unknown property>) returns undef");
1377 is(prop_invlist(undef), undef, "prop_invlist(undef) returns undef");
1378 is(prop_invlist("Any"), 2, "prop_invlist('Any') returns the number of elements in scalar context");
1381 is(prop_invlist("Is_Is_Any"), undef, "prop_invlist('Is_Is_Any') returns <undef> since two is's");
1385 is(prop_invlist("InKana"), undef, "prop_invlist(<user-defined property returns undef>)");
1387 # The way both the tests for invlist and invmap work is that they take the
1394 # ASCII_Hex_Digit again, as it is stable.
1400 "prop_invlist('AHex') is exactly the expected set of points");
1404 "prop_invlist('AHex=f') is exactly the expected set of points");
1414 "prop_invlist('AHex') is exactly the expected set of points");
1425 "prop_invlist('AHex=f') is exactly the expected set of points");
1435 is($constructed, $official, "$tested_function_name('$prop')");
1477 # If this is to be loose matched, add in characters to test that.
1483 # Verify that loose matching fails when only strict is called
1485 …next unless is(prop_invlist("$prop_only=$extra_chars$value"), undef, "prop_invlist('$prop_only=$ex…
1511 … is(@tested, 0, "prop_invlist('$mod_table') returns an empty list since is internal-only");
1525 # A '!' in the file name means that it is to be inverted.
1529 # If the file's directory is '#', it is a special case where the
1531 # it being an actual file to read. The file is an index in to the
1546 # If we are to test against an inverted file, it is easier to invert
1557 # The file is inversion list format code points, like this:
1563 # The V indicates it's an inversion list, and is followed immediately
1585 is (@list, 0, "prop_invmap(<Unknown property>) returns an empty list");
1587 is (@list, 0, "prop_invmap(undef) returns an empty list");
1591 is (@list, 0, "prop_invmap(<internal property>) returns an empty list");
1593 is(@list, 0, "prop_invmap(<user-defined property returns undef>)");
1595 is(@list, 0, "prop_invmap('Perl_Decomposition_Mapping') returns <undef> since internal-Perl-only");
1597 is(@list, 0, "prop_invmap('Perl_Charnames') returns <undef> since internal-Perl-only");
1599 is(@list, 0, "prop_invmap('Is_Is_Any') returns <undef> since two is's");
1602 # applications use them (though such use is deprecated).
1613 # Like prop_invlist(), prop_invmap() is tested by comparing the results
1633 my $display_prop; # The property name that is displayed, as opposed
1634 # to the one that is actually used.
1644 diag("is unknown to prop_aliases(), and we need it in order to test prop_invmap");
1649 # Normalize the short name, as it is stored in the hashes under the
1654 # table are generated. For all the tests except prop_invmap(), this is
1655 # irrelevant, but for prop_invmap, having an 'is' prefix forces it to
1657 # between the two forms. The property name is what has this information.
1677 # If prop_invmap() returned nothing, is ok iff is a property whose file is
1688 diag("'$prop' is unknown to prop_invmap()");
1707 diag("The last inversion list element is not 0x110000");
1740 diag("The last inversion list element is '$invmap_ref->[-1]', and should be '$missing'");
1802 # Name property which is structurally enough different that it is handled
1811 # In the above, blk is done unconditionally, as we need to test that
1815 # file (which is valid, unlike blk) instead of the combo
1821 # The blk property is special. The original file with old block
1822 # names is retained, and the default (on ASCII platforms) is to
1823 # not write out a new-name file. What we do is get the old names
1825 # would look like. $base_file is needed to be defined, just to
1827 $base_file = "This is a dummy name";
1833 # flattened, so have one level. ($index is used as a dummy
1849 # would look like. (The sub range is for EBCDIC platforms
1868 # below. This should happen only when it is a binary property
1870 # and so it is stored in a different directory than the To ones.
1871 # XXX Currently, the only cases where it is complemented are the
1883 # Read in the file. If the file's directory is '#', it is a
1908 # If this is a multi-char range, we turn it into as many
1975 # need to be removed as the list is for just the full ones.
1992 # Here $i is such that it points to the first range which ends
1993 # at or above cp, and hence is the only range that could
2000 # Otherwise, remove the existing entry. If it is the first
2010 # ... but if it is the only element in the range, remove
2016 else { # Is somewhere in the middle of the range
2053 # For these properties the file is output using hex notation for the
2076 # error or is relevant). As we go along, we delete from that copy.
2077 # If a delete fails, or something is left over after we are done,
2081 # The extra -$upper_limit_subtract is because the final element may
2086 # If the map element is a reference, have to stringify it (but
2098 # Script_Extension, is space separated. Assume the latter
2129 # the specials hash. The key is the utf8 bytes of the
2130 # code point, and the value is its map as a utf-8 string.
2155 … diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]);
2172 } # Otherwise, the map is to a simple scalar
2184 # not stored as adjusted. (There currently is only one ale
2198 # until the whole range is processed).
2231 … diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]);
2242 # include it in the built-up string. But make sure that it is in
2249 diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]);
2256 # Ignore it, but make sure it is in order.
2265 diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]);
2284 # If the final value is infinity, no line for it exists.
2330 # Handle the Name property similar to the above. But the file is
2331 # sufficiently different that it is more convenient to make a special
2332 # case for it. It is a combination of the Name, Unicode1_Name, and
2424 diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]);
2436 diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]);
2464 # and look for this start and end. If not found is an
2493 # with what the file is.
2510 # Here the map is not more or less directly from a file stored on
2533 diag(sprintf "Range beginning at %04X is out-of-order.", $invlist_ref->[$i]);
2547 # the final element of the loop. If that range is for the default
2548 # value, it shouldn't be closed, as it is to extend to infinity. But
2575 …diag("There is no element [$i] for $prop=$map from prop_invlist(), while [$i] in the implicit one …
2580 …diag("There is no element [$i] from the implicit $prop=$map constructed from prop_invmap(), while …
2585 …licit $prop=$map constructed from prop_invmap() is '$maps{$map}[$i]', and the one from prop_invlis…
2607 is($scripts_map_ref->[$index], "Greek", "U+0390 is Greek");
2609 …is(search_invlist(\@alpha_invlist, ord("\t")), undef, "search_invlist returns undef for code point…
2624 is($count, 2, "MAX_CP isn't too large");