xref: /openbsd-src/gnu/usr.bin/perl/lib/charnames.t (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1#!./perl
2
3my @WARN;
4
5BEGIN {
6    unless(grep /blib/, @INC) {
7	chdir 't' if -d 't';
8	@INC = '../lib';
9	require './test.pl';
10    }
11    $SIG{__WARN__} = sub { push @WARN, @_ };
12}
13
14require File::Spec;
15
16$| = 1;
17
18print "1..80\n";
19
20use charnames ':full';
21
22print "not " unless "Here\N{EXCLAMATION MARK}?" eq "Here!?";
23print "ok 1\n";
24
25{
26  use bytes;			# TEST -utf8 can switch utf8 on
27
28  print "# \$res=$res \$\@='$@'\nnot "
29    if $res = eval <<'EOE'
30use charnames ":full";
31"Here: \N{CYRILLIC SMALL LETTER BE}!";
321
33EOE
34      or $@ !~ /above 0xFF/;
35  print "ok 2\n";
36  # print "# \$res=$res \$\@='$@'\n";
37
38  print "# \$res=$res \$\@='$@'\nnot "
39    if $res = eval <<'EOE'
40use charnames 'cyrillic';
41"Here: \N{Be}!";
421
43EOE
44      or $@ !~ /CYRILLIC CAPITAL LETTER BE.*above 0xFF/;
45  print "ok 3\n";
46}
47
48# If octal representation of unicode char is \0xyzt, then the utf8 is \3xy\2zt
49if (ord('A') == 65) { # as on ASCII or UTF-8 machines
50    $encoded_be = "\320\261";
51    $encoded_alpha = "\316\261";
52    $encoded_bet = "\327\221";
53    $encoded_deseng = "\360\220\221\215";
54}
55else { # EBCDIC where UTF-EBCDIC may be used (this may be 1047 specific since
56       # UTF-EBCDIC is codepage specific)
57    $encoded_be = "\270\102\130";
58    $encoded_alpha = "\264\130";
59    $encoded_bet = "\270\125\130";
60    $encoded_deseng = "\336\102\103\124";
61}
62
63sub to_bytes {
64    unpack"U0a*", shift;
65}
66
67{
68  use charnames ':full';
69
70  print "not " unless to_bytes("\N{CYRILLIC SMALL LETTER BE}") eq $encoded_be;
71  print "ok 4\n";
72
73  use charnames qw(cyrillic greek :short);
74
75  print "not " unless to_bytes("\N{be},\N{alpha},\N{hebrew:bet}")
76    eq "$encoded_be,$encoded_alpha,$encoded_bet";
77  print "ok 5\n";
78}
79
80{
81    use charnames ':full';
82    print "not " unless "\x{263a}" eq "\N{WHITE SMILING FACE}";
83    print "ok 6\n";
84    print "not " unless length("\x{263a}") == 1;
85    print "ok 7\n";
86    print "not " unless length("\N{WHITE SMILING FACE}") == 1;
87    print "ok 8\n";
88    print "not " unless sprintf("%vx", "\x{263a}") eq "263a";
89    print "ok 9\n";
90    print "not " unless sprintf("%vx", "\N{WHITE SMILING FACE}") eq "263a";
91    print "ok 10\n";
92    print "not " unless sprintf("%vx", "\xFF\N{WHITE SMILING FACE}") eq "ff.263a";
93    print "ok 11\n";
94    print "not " unless sprintf("%vx", "\x{ff}\N{WHITE SMILING FACE}") eq "ff.263a";
95    print "ok 12\n";
96}
97
98{
99   use charnames qw(:full);
100   use utf8;
101
102    my $x = "\x{221b}";
103    my $named = "\N{CUBE ROOT}";
104
105    print "not " unless ord($x) == ord($named);
106    print "ok 13\n";
107}
108
109{
110   use charnames qw(:full);
111   use utf8;
112   print "not " unless "\x{100}\N{CENT SIGN}" eq "\x{100}"."\N{CENT SIGN}";
113   print "ok 14\n";
114}
115
116{
117  use charnames ':full';
118
119  print "not "
120      unless to_bytes("\N{DESERET SMALL LETTER ENG}") eq $encoded_deseng;
121  print "ok 15\n";
122}
123
124{
125  # 20001114.001
126
127  no utf8; # naked Latin-1
128
129  if (ord("�") == 0xc4) { # Try to do this only on Latin-1.
130      use charnames ':full';
131      my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
132      print "not " unless $text eq "\xc4" && ord($text) == 0xc4;
133      print "ok 16\n";
134  } else {
135      print "ok 16 # Skip: not Latin-1\n";
136  }
137}
138
139{
140    print "not " unless charnames::viacode(0x1234) eq "ETHIOPIC SYLLABLE SEE";
141    print "ok 17\n";
142
143    # Unused Hebrew.
144    print "not " if defined charnames::viacode(0x0590);
145    print "ok 18\n";
146}
147
148{
149    print "not " unless
150	sprintf("%04X", charnames::vianame("GOTHIC LETTER AHSA")) eq "10330";
151    print "ok 19\n";
152
153    print "not " if
154	defined charnames::vianame("NONE SUCH");
155    print "ok 20\n";
156}
157
158{
159    # check that caching at least hasn't broken anything
160
161    print "not " unless charnames::viacode(0x1234) eq "ETHIOPIC SYLLABLE SEE";
162    print "ok 21\n";
163
164    print "not " unless
165	sprintf("%04X", charnames::vianame("GOTHIC LETTER AHSA")) eq "10330";
166    print "ok 22\n";
167
168}
169
170print "not " unless "\N{CHARACTER TABULATION}" eq "\t";
171print "ok 23\n";
172
173print "not " unless "\N{ESCAPE}" eq "\e";
174print "ok 24\n";
175
176print "not " unless "\N{NULL}" eq "\c@";
177print "ok 25\n";
178
179print "not " unless "\N{LINE FEED (LF)}" eq "\n";
180print "ok 26\n";
181
182print "not " unless "\N{LINE FEED}" eq "\n";
183print "ok 27\n";
184
185print "not " unless "\N{LF}" eq "\n";
186print "ok 28\n";
187
188my $nel = ord("A") == 193 ? qr/^(?:\x15|\x25)$/ : qr/^\x85$/;
189
190print "not " unless "\N{NEXT LINE (NEL)}" =~ $nel;
191print "ok 29\n";
192
193print "not " unless "\N{NEXT LINE}" =~ $nel;
194print "ok 30\n";
195
196print "not " unless "\N{NEL}" =~ $nel;
197print "ok 31\n";
198
199print "not " unless "\N{BYTE ORDER MARK}" eq chr(0xFEFF);
200print "ok 32\n";
201
202print "not " unless "\N{BOM}" eq chr(0xFEFF);
203print "ok 33\n";
204
205{
206    use warnings 'deprecated';
207
208    print "not " unless "\N{HORIZONTAL TABULATION}" eq "\t";
209    print "ok 34\n";
210
211    print "not " unless grep { /"HORIZONTAL TABULATION" is deprecated/ } @WARN;
212    print "ok 35\n";
213
214    no warnings 'deprecated';
215
216    print "not " unless "\N{VERTICAL TABULATION}" eq "\013";
217    print "ok 36\n";
218
219    print "not " if grep { /"VERTICAL TABULATION" is deprecated/ } @WARN;
220    print "ok 37\n";
221}
222
223print "not " unless charnames::viacode(0xFEFF) eq "ZERO WIDTH NO-BREAK SPACE";
224print "ok 38\n";
225
226{
227    use warnings;
228    print "not " unless ord("\N{BOM}") == 0xFEFF;
229    print "ok 39\n";
230}
231
232print "not " unless ord("\N{ZWNJ}") == 0x200C;
233print "ok 40\n";
234
235print "not " unless ord("\N{ZWJ}") == 0x200D;
236print "ok 41\n";
237
238print "not " unless "\N{U+263A}" eq "\N{WHITE SMILING FACE}";
239print "ok 42\n";
240
241{
242    print "not " unless
243	0x3093 == charnames::vianame("HIRAGANA LETTER N");
244    print "ok 43\n";
245
246    print "not " unless
247	0x0397 == charnames::vianame("GREEK CAPITAL LETTER ETA");
248    print "ok 44\n";
249}
250
251print "not " if defined charnames::viacode(0x110000);
252print "ok 45\n";
253
254print "not " if grep { /you asked for U+110000/ } @WARN;
255print "ok 46\n";
256
257
258# ---- Alias extensions
259
260my $alifile = File::Spec->catfile(File::Spec->updir, qw(lib unicore xyzzy_alias.pl));
261my $i = 0;
262
263my @prgs;
264{   local $/ = undef;
265    @prgs = split "\n########\n", <DATA>;
266    }
267
268my $i = 46;
269for (@prgs) {
270    my ($code, $exp) = ((split m/\nEXPECT\n/), '$');
271    my ($prog, $fil) = ((split m/\nFILE\n/, $code), "");
272    my $tmpfile = tempfile();
273    open my $tmp, "> $tmpfile" or die "Could not open $tmpfile: $!";
274    print $tmp $prog, "\n";
275    close $tmp or die "Could not close $tmpfile: $!";
276    if ($fil) {
277	$fil .= "\n";
278	open my $ali, "> $alifile" or die "Could not open $alifile: $!";
279	print $ali $fil;
280	close $ali or die "Could not close $alifile: $!";
281	}
282    my $res = runperl( switches => $switch,
283                       progfile => $tmpfile,
284                       stderr => 1 );
285    my $status = $?;
286    $res =~ s/[\r\n]+$//;
287    $res =~ s/tmp\d+/-/g;			# fake $prog from STDIN
288    $res =~ s/\n%[A-Z]+-[SIWEF]-.*$//		# clip off DCL status msg
289	if $^O eq "VMS";
290    $exp =~ s/[\r\n]+$//;
291    my $pfx = ($res =~ s/^PREFIX\n//);
292    my $rexp = qr{^$exp};
293    if ($res =~ s/^SKIPPED\n//) {
294	print "$results\n";
295	}
296    elsif (($pfx and $res !~ /^\Q$expected/) or
297	  (!$pfx and $res !~ $rexp)) {
298        print STDERR
299	    "PROG:\n$prog\n",
300	    "FILE:\n$fil",
301	    "EXPECTED:\n$exp\n",
302	    "GOT:\n$res\n";
303        print "not ";
304	}
305    print "ok ", ++$i, "\n";
306    $fil or next;
307    1 while unlink $alifile;
308    }
309
310# [perl #30409] charnames.pm clobbers default variable
311$_ = 'foobar';
312eval "use charnames ':full';";
313print "not " unless $_ eq 'foobar';
314print "ok 74\n";
315
316# Unicode slowdown noted by Phil Pennock, traced to a bug fix in index
317# SADAHIRO Tomoyuki's suggestion is to ensure that the UTF-8ness of both
318# arguments are indentical before calling index.
319# To do this can take advantage of the fact that unicore/Name.pl is 7 bit
320# (or at least should be). So assert that that it's true here.
321
322my $names = do "unicore/Name.pl";
323print defined $names ? "ok 75\n" : "not ok 75\n";
324if (ord('A') == 65) { # as on ASCII or UTF-8 machines
325  my $non_ascii = $names =~ tr/\0-\177//c;
326  print $non_ascii ? "not ok 76 # $non_ascii\n" : "ok 76\n";
327} else {
328  print "ok 76\n";
329}
330
331# Verify that charnames propagate to eval("")
332my $evaltry = eval q[ "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}" ];
333if ($@) {
334    print "# $@not ok 77\nnot ok 78\n";
335} else {
336    print "ok 77\n";
337    print "not " unless $evaltry eq "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}";
338    print "ok 78\n";
339}
340
341# Verify that db includes the normative NameAliases.txt names
342print "not " unless "\N{U+1D0C5}" eq "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}";
343print "ok 79\n";
344
345# [perl #73174] use of \N{FOO} used to reset %^H
346
347{
348    use charnames ":full";
349    my $res;
350    BEGIN { $^H{73174} = "foo" }
351    BEGIN { $res = ($^H{73174} // "") }
352    # forces loading of utf8.pm, which used to reset %^H
353    $res .= '-1' if ":" =~ /\N{COLON}/i;
354    BEGIN { $res .= '-' . ($^H{73174} // "") }
355    $res .= '-' . ($^H{73174} // "");
356    $res .= '-2' if ":" =~ /\N{COLON}/;
357    $res .= '-3' if ":" =~ /\N{COLON}/i;
358    print $res eq "foo-foo-1--2-3" ? "" : "not ",
359	"ok 80 - \$^H{foo} correct after /\\N{bar}/i (res=$res)\n";
360}
361
362__END__
363# unsupported pragma
364use charnames ":scoobydoo";
365"Here: \N{e_ACUTE}!\n";
366EXPECT
367unsupported special ':scoobydoo' in charnames at
368########
369# wrong type of alias (missing colon)
370use charnames "alias";
371"Here: \N{e_ACUTE}!\n";
372EXPECT
373Unknown charname 'e_ACUTE' at
374########
375# alias without an argument
376use charnames ":alias";
377"Here: \N{e_ACUTE}!\n";
378EXPECT
379:alias needs an argument in charnames at
380########
381# reversed sequence
382use charnames ":alias" => ":full";
383"Here: \N{e_ACUTE}!\n";
384EXPECT
385:alias cannot use existing pragma :full \(reversed order\?\) at
386########
387# alias with hashref but no :full
388use charnames ":alias" => { e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE" };
389"Here: \N{e_ACUTE}!\n";
390EXPECT
391Unknown charname 'LATIN SMALL LETTER E WITH ACUTE' at
392########
393# alias with hashref but with :short
394use charnames ":short", ":alias" => { e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE" };
395"Here: \N{e_ACUTE}!\n";
396EXPECT
397Unknown charname 'LATIN SMALL LETTER E WITH ACUTE' at
398########
399# alias with hashref to :full OK
400use charnames ":full", ":alias" => { e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE" };
401"Here: \N{e_ACUTE}!\n";
402EXPECT
403$
404########
405# alias with hashref to :short but using :full
406use charnames ":full", ":alias" => { e_ACUTE => "LATIN:e WITH ACUTE" };
407"Here: \N{e_ACUTE}!\n";
408EXPECT
409Unknown charname 'LATIN:e WITH ACUTE' at
410########
411# alias with hashref to :short OK
412use charnames ":short", ":alias" => { e_ACUTE => "LATIN:e WITH ACUTE" };
413"Here: \N{e_ACUTE}!\n";
414EXPECT
415$
416########
417# alias with bad hashref
418use charnames ":short", ":alias" => "e_ACUTE";
419"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
420EXPECT
421unicore/e_ACUTE_alias.pl cannot be used as alias file for charnames at
422########
423# alias with arrayref
424use charnames ":short", ":alias" => [ e_ACUTE => "LATIN:e WITH ACUTE" ];
425"Here: \N{e_ACUTE}!\n";
426EXPECT
427Only HASH reference supported as argument to :alias at
428########
429# alias with bad hashref
430use charnames ":short", ":alias" => { e_ACUTE => "LATIN:e WITH ACUTE", "a_ACUTE" };
431"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
432EXPECT
433Use of uninitialized value
434########
435# alias with hashref two aliases
436use charnames ":short", ":alias" => {
437    e_ACUTE => "LATIN:e WITH ACUTE",
438    a_ACUTE => "",
439    };
440"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
441EXPECT
442Unknown charname '' at
443########
444# alias with hashref two aliases
445use charnames ":short", ":alias" => {
446    e_ACUTE => "LATIN:e WITH ACUTE",
447    a_ACUTE => "LATIN:a WITH ACUTE",
448    };
449"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
450EXPECT
451$
452########
453# alias with hashref using mixed aliasses
454use charnames ":short", ":alias" => {
455    e_ACUTE => "LATIN:e WITH ACUTE",
456    a_ACUTE => "LATIN SMALL LETTER A WITH ACUT",
457    };
458"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
459EXPECT
460Unknown charname 'LATIN SMALL LETTER A WITH ACUT' at
461########
462# alias with hashref using mixed aliasses
463use charnames ":short", ":alias" => {
464    e_ACUTE => "LATIN:e WITH ACUTE",
465    a_ACUTE => "LATIN SMALL LETTER A WITH ACUTE",
466    };
467"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
468EXPECT
469Unknown charname 'LATIN SMALL LETTER A WITH ACUTE' at
470########
471# alias with hashref using mixed aliasses
472use charnames ":full", ":alias" => {
473    e_ACUTE => "LATIN:e WITH ACUTE",
474    a_ACUTE => "LATIN SMALL LETTER A WITH ACUTE",
475    };
476"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
477EXPECT
478Unknown charname 'LATIN:e WITH ACUTE' at
479########
480# alias with nonexisting file
481use charnames ":full", ":alias" => "xyzzy";
482"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
483EXPECT
484unicore/xyzzy_alias.pl cannot be used as alias file for charnames at
485########
486# alias with bad file name
487use charnames ":full", ":alias" => "xy 7-";
488"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
489EXPECT
490Charnames alias files can only have identifier characters at
491########
492# alias with non_absolute (existing) file name (which it should /not/ use)
493use charnames ":full", ":alias" => "perl";
494"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
495EXPECT
496unicore/perl_alias.pl cannot be used as alias file for charnames at
497########
498# alias with bad file
499use charnames ":full", ":alias" => "xyzzy";
500"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
501FILE
502#!perl
5030;
504EXPECT
505unicore/xyzzy_alias.pl did not return a \(valid\) list of alias pairs at
506########
507# alias with file with empty list
508use charnames ":full", ":alias" => "xyzzy";
509"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
510FILE
511#!perl
512();
513EXPECT
514Unknown charname 'e_ACUTE' at
515########
516# alias with file OK but file has :short aliasses
517use charnames ":full", ":alias" => "xyzzy";
518"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
519FILE
520#!perl
521(   e_ACUTE => "LATIN:e WITH ACUTE",
522    a_ACUTE => "LATIN:a WITH ACUTE",
523    );
524EXPECT
525Unknown charname 'LATIN:e WITH ACUTE' at
526########
527# alias with :short and file OK
528use charnames ":short", ":alias" => "xyzzy";
529"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
530FILE
531#!perl
532(   e_ACUTE => "LATIN:e WITH ACUTE",
533    a_ACUTE => "LATIN:a WITH ACUTE",
534    );
535EXPECT
536$
537########
538# alias with :short and file OK has :long aliasses
539use charnames ":short", ":alias" => "xyzzy";
540"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
541FILE
542#!perl
543(   e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE",
544    a_ACUTE => "LATIN SMALL LETTER A WITH ACUTE",
545    );
546EXPECT
547Unknown charname 'LATIN SMALL LETTER E WITH ACUTE' at
548########
549# alias with file implicit :full but file has :short aliasses
550use charnames ":alias" => ":xyzzy";
551"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
552FILE
553#!perl
554(   e_ACUTE => "LATIN:e WITH ACUTE",
555    a_ACUTE => "LATIN:a WITH ACUTE",
556    );
557EXPECT
558Unknown charname 'LATIN:e WITH ACUTE' at
559########
560# alias with file implicit :full and file has :long aliasses
561use charnames ":alias" => ":xyzzy";
562"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
563FILE
564#!perl
565(   e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE",
566    a_ACUTE => "LATIN SMALL LETTER A WITH ACUTE",
567    );
568EXPECT
569$
570