xref: /openbsd-src/gnu/usr.bin/perl/t/uni/class.t (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1BEGIN {
2    chdir 't' if -d 't';
3    @INC = qw(../lib .);
4    require "test.pl";
5}
6
7plan tests => 4784;
8
9sub MyUniClass {
10  <<END;
110030	004F
12END
13}
14
15sub Other::Class {
16  <<END;
170040	005F
18END
19}
20
21sub A::B::Intersection {
22  <<END;
23+main::MyUniClass
24&Other::Class
25END
26}
27
28sub test_regexp ($$) {
29  # test that given string consists of N-1 chars matching $qr1, and 1
30  # char matching $qr2
31  my ($str, $blk) = @_;
32
33  # constructing these objects here makes the last test loop go much faster
34  my $qr1 = qr/(\p{$blk}+)/;
35  if ($str =~ $qr1) {
36    is($1, substr($str, 0, -1));		# all except last char
37  }
38  else {
39    fail('first N-1 chars did not match');
40  }
41
42  my $qr2 = qr/(\P{$blk}+)/;
43  if ($str =~ $qr2) {
44    is($1, substr($str, -1));			# only last char
45  }
46  else {
47    fail('last char did not match');
48  }
49}
50
51use strict;
52
53my $str;
54
55if (ord('A') == 193) {
56    $str = join "", map chr($_), 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 0xF0 .. 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 0x7C, 0xC1 .. 0xC9, 0xD1 .. 0xD9, 0xE2 .. 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, 0x79, 0x81 .. 0x89, 0x91 .. 0x96; # IBM-1047
57} else {
58    $str = join "", map chr($_), 0x20 .. 0x6F;
59}
60
61# make sure it finds built-in class
62is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
63is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
64
65# make sure it finds user-defined class
66is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
67
68# make sure it finds class in other package
69is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
70
71# make sure it finds class in other OTHER package
72is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
73
74# all of these should look in lib/unicore/bc/AL.pl
75$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
76is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
77is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
78is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
79is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
80
81# make sure InGreek works
82$str = "[\x{038B}\x{038C}\x{038D}]";
83
84is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
85is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
86is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
87is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
88is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
89
90use File::Spec;
91my $updir = File::Spec->updir;
92
93# the %utf8::... hashes are already in existence
94# because utf8_pva.pl was run by utf8_heavy.pl
95
96*utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning
97
98no warnings 'utf8'; # we do not want warnings about surrogates etc
99
100sub char_range {
101    my ($h1, $h2) = @_;
102
103    my $str;
104
105    if (ord('A') == 193 && $h1 < 256) {
106	my $h3 = ($h2 || $h1) + 1;
107	if ($h3 - $h1 == 1) {
108	    $str = join "", pack 'U*', $h1 .. $h3; # Using pack since chr doesn't generate Unicode chars for value < 256.
109	} elsif ($h3 - $h1 > 1) {
110	    for (my $i = $h1; $i <= $h3; $i++) {
111		$str = join "", $str, pack 'U*', $i;
112	    }
113	}
114    } else {
115	$str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
116    }
117
118    return $str;
119}
120
121# non-General Category and non-Script
122while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
123  my $prop_name = $utf8::PropertyAlias{$abbrev};
124  next unless $prop_name;
125  next if $abbrev eq "gc_sc";
126
127  for (sort keys %$files) {
128    my $filename = File::Spec->catfile(
129      $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
130    );
131
132    next unless -e $filename;
133    my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
134
135    my $str = char_range($h1, $h2);
136
137    for my $p ($prop_name, $abbrev) {
138      for my $c ($files->{$_}, $_) {
139        is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
140        is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
141      }
142    }
143  }
144}
145
146# General Category and Script
147for my $p ('gc', 'sc') {
148  while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
149    my $filename = File::Spec->catfile(
150      $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
151    );
152
153    next unless -e $filename;
154    my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
155
156    my $str = char_range($h1, $h2);
157
158    for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
159      for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
160        is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
161        is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
162        SKIP: {
163	  skip("surrogate", 1) if $abbr eq 'cs';
164 	  test_regexp ($str, $y);
165        }
166      }
167    }
168  }
169}
170
171# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
172SKIP:
173{
174  skip "Can't reliably derive class names from file names", 576 if $^O eq 'VMS';
175
176  # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
177  # return true. Try to work around this by reading the filenames explicitly
178  # to get a case sensitive test.  N.B.  This will fail if filename case is
179  # not preserved because you might go looking for a class name of CF or cf
180  # when you really want Cf.  Storing case sensitive data in filenames is
181  # simply not portable.
182
183  my %files;
184
185  my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
186  opendir D, $dirname or die $!;
187  @files{readdir(D)} = ();
188  closedir D;
189
190  for (keys %utf8::PA_reverse) {
191    my $leafname = "$utf8::PA_reverse{$_}.pl";
192    next unless exists $files{$leafname};
193
194    my $filename = File::Spec->catfile($dirname, $leafname);
195
196    my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
197
198    my $str = char_range($h1, $h2);
199
200    for my $x ('gc', 'General Category') {
201      print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
202      for my $y ($_, $utf8::PA_reverse{$_}) {
203	is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
204	is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
205	test_regexp ($str, $y);
206      }
207    }
208  }
209}
210
211# test the blocks (InFoobar)
212for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
213  my $filename = File::Spec->catfile(
214    $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
215  );
216
217  next unless -e $filename;
218
219  print "# In$_ $filename\n";
220
221  my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
222
223  my $str = char_range($h1, $h2);
224
225  my $blk = $_;
226
227  SKIP: {
228    skip($blk, 2) if $blk =~ /surrogates/i;
229    test_regexp ($str, $blk);
230    $blk =~ s/^In/Block:/;
231    test_regexp ($str, $blk);
232  }
233}
234
235