xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/ext/Encode/t/encoding.t (revision 0:68f95e015346)
1*0Sstevel@tonic-gateBEGIN {
2*0Sstevel@tonic-gate    require Config; import Config;
3*0Sstevel@tonic-gate    if ($Config{'extensions'} !~ /\bEncode\b/) {
4*0Sstevel@tonic-gate      print "1..0 # Skip: Encode was not built\n";
5*0Sstevel@tonic-gate      exit 0;
6*0Sstevel@tonic-gate    }
7*0Sstevel@tonic-gate    unless (find PerlIO::Layer 'perlio') {
8*0Sstevel@tonic-gate	print "1..0 # Skip: PerlIO was not built\n";
9*0Sstevel@tonic-gate	exit 0;
10*0Sstevel@tonic-gate    }
11*0Sstevel@tonic-gate    if (ord("A") == 193) {
12*0Sstevel@tonic-gate	print "1..0 # encoding pragma does not support EBCDIC platforms\n";
13*0Sstevel@tonic-gate	exit(0);
14*0Sstevel@tonic-gate    }
15*0Sstevel@tonic-gate}
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gateprint "1..31\n";
18*0Sstevel@tonic-gate
19*0Sstevel@tonic-gateuse encoding "latin1"; # ignored (overwritten by the next line)
20*0Sstevel@tonic-gateuse encoding "greek";  # iso 8859-7 (no "latin" alias, surprise...)
21*0Sstevel@tonic-gate
22*0Sstevel@tonic-gate# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
23*0Sstevel@tonic-gate# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
24*0Sstevel@tonic-gate# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
25*0Sstevel@tonic-gate
26*0Sstevel@tonic-gate$a = "\xDF";
27*0Sstevel@tonic-gate$b = "\x{100}";
28*0Sstevel@tonic-gate
29*0Sstevel@tonic-gateprint "not " unless ord($a) == 0x3af;
30*0Sstevel@tonic-gateprint "ok 1\n";
31*0Sstevel@tonic-gate
32*0Sstevel@tonic-gateprint "not " unless ord($b) == 0x100;
33*0Sstevel@tonic-gateprint "ok 2\n";
34*0Sstevel@tonic-gate
35*0Sstevel@tonic-gatemy $c;
36*0Sstevel@tonic-gate
37*0Sstevel@tonic-gate$c = $a . $b;
38*0Sstevel@tonic-gate
39*0Sstevel@tonic-gateprint "not " unless ord($c) == 0x3af;
40*0Sstevel@tonic-gateprint "ok 3\n";
41*0Sstevel@tonic-gate
42*0Sstevel@tonic-gateprint "not " unless length($c) == 2;
43*0Sstevel@tonic-gateprint "ok 4\n";
44*0Sstevel@tonic-gate
45*0Sstevel@tonic-gateprint "not " unless ord(substr($c, 1, 1)) == 0x100;
46*0Sstevel@tonic-gateprint "ok 5\n";
47*0Sstevel@tonic-gate
48*0Sstevel@tonic-gateprint "not " unless ord(chr(0xdf)) == 0x3af; # spooky
49*0Sstevel@tonic-gateprint "ok 6\n";
50*0Sstevel@tonic-gate
51*0Sstevel@tonic-gateprint "not " unless ord(pack("C", 0xdf)) == 0x3af;
52*0Sstevel@tonic-gateprint "ok 7\n";
53*0Sstevel@tonic-gate
54*0Sstevel@tonic-gate# we didn't break pack/unpack, I hope
55*0Sstevel@tonic-gate
56*0Sstevel@tonic-gateprint "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
57*0Sstevel@tonic-gateprint "ok 8\n";
58*0Sstevel@tonic-gate
59*0Sstevel@tonic-gate# the first octet of UTF-8 encoded 0x3af
60*0Sstevel@tonic-gateprint "not " unless unpack("C", chr(0xdf)) == 0xce;
61*0Sstevel@tonic-gateprint "ok 9\n";
62*0Sstevel@tonic-gate
63*0Sstevel@tonic-gateprint "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
64*0Sstevel@tonic-gateprint "ok 10\n";
65*0Sstevel@tonic-gate
66*0Sstevel@tonic-gateprint "not " unless unpack("U", chr(0xdf)) == 0x3af;
67*0Sstevel@tonic-gateprint "ok 11\n";
68*0Sstevel@tonic-gate
69*0Sstevel@tonic-gate# charnames must still work
70*0Sstevel@tonic-gateuse charnames ':full';
71*0Sstevel@tonic-gateprint "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
72*0Sstevel@tonic-gateprint "ok 12\n";
73*0Sstevel@tonic-gate
74*0Sstevel@tonic-gate# combine
75*0Sstevel@tonic-gate
76*0Sstevel@tonic-gate$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
77*0Sstevel@tonic-gate
78*0Sstevel@tonic-gateprint "not " unless ord($c) == 0x3af;
79*0Sstevel@tonic-gateprint "ok 13\n";
80*0Sstevel@tonic-gate
81*0Sstevel@tonic-gateprint "not " unless ord(substr($c, 1, 1)) == 0xdf;
82*0Sstevel@tonic-gateprint "ok 14\n";
83*0Sstevel@tonic-gate
84*0Sstevel@tonic-gateprint "not " unless ord(substr($c, 2, 1)) == 0x3af;
85*0Sstevel@tonic-gateprint "ok 15\n";
86*0Sstevel@tonic-gate
87*0Sstevel@tonic-gate# regex literals
88*0Sstevel@tonic-gate
89*0Sstevel@tonic-gateprint "not " unless "\xDF"    =~ /\x{3AF}/;
90*0Sstevel@tonic-gateprint "ok 16\n";
91*0Sstevel@tonic-gate
92*0Sstevel@tonic-gateprint "not " unless "\x{3AF}" =~ /\xDF/;
93*0Sstevel@tonic-gateprint "ok 17\n";
94*0Sstevel@tonic-gate
95*0Sstevel@tonic-gateprint "not " unless "\xDF"    =~ /\xDF/;
96*0Sstevel@tonic-gateprint "ok 18\n";
97*0Sstevel@tonic-gate
98*0Sstevel@tonic-gateprint "not " unless "\x{3AF}" =~ /\x{3AF}/;
99*0Sstevel@tonic-gateprint "ok 19\n";
100*0Sstevel@tonic-gate
101*0Sstevel@tonic-gate# eq, cmp
102*0Sstevel@tonic-gate
103*0Sstevel@tonic-gatemy ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = (
104*0Sstevel@tonic-gate    pack("C*", 0xDF ),       # byte
105*0Sstevel@tonic-gate    pack("C*", 0xDF, 0x20),  # ($bytes2 cmp $U) > 0
106*0Sstevel@tonic-gate    pack("U*", 0x3AF),       # $U eq $byte
107*0Sstevel@tonic-gate    pack("U*", 0xDF ),       # $Ub would eq $bytev w/o use encoding
108*0Sstevel@tonic-gate    pack("U*", 0x3B1),       # ($g1 cmp $byte) > 0; === chr(0xe1)
109*0Sstevel@tonic-gate    pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
110*0Sstevel@tonic-gate    pack("U*", 0x3AB),       # ($l  cmp $byte) < 0; === chr(0xdb)
111*0Sstevel@tonic-gate);
112*0Sstevel@tonic-gate
113*0Sstevel@tonic-gate# all the tests in this section that compare a byte encoded string
114*0Sstevel@tonic-gate# ato UTF-8 encoded are run in all possible vairants
115*0Sstevel@tonic-gate# all of the eq, ne, cmp operations tested,
116*0Sstevel@tonic-gate# $v z $u tested as well as $u z $v
117*0Sstevel@tonic-gate
118*0Sstevel@tonic-gatesub alleq($$){
119*0Sstevel@tonic-gate    my ($a,$b)    =    (shift, shift);
120*0Sstevel@tonic-gate     $a  eq  $b        &&     $b  eq  $a         &&
121*0Sstevel@tonic-gate  !( $a  ne  $b )      &&  !( $b  ne  $a )       &&
122*0Sstevel@tonic-gate   ( $a  cmp $b ) == 0 &&   ( $b  cmp $a ) == 0;
123*0Sstevel@tonic-gate}
124*0Sstevel@tonic-gate
125*0Sstevel@tonic-gatesub anyeq($$){
126*0Sstevel@tonic-gate    my ($a,$b)    =    (shift, shift);
127*0Sstevel@tonic-gate     $a  eq  $b        ||     $b  eq  $a         ||
128*0Sstevel@tonic-gate  !( $a  ne  $b )      ||  !( $b  ne  $a )       ||
129*0Sstevel@tonic-gate   ( $a  cmp $b ) == 0 ||   ( $b  cmp $a ) == 0;
130*0Sstevel@tonic-gate}
131*0Sstevel@tonic-gate
132*0Sstevel@tonic-gatesub allgt($$){
133*0Sstevel@tonic-gate    my ($a,$b)    =    (shift, shift);
134*0Sstevel@tonic-gate    ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
135*0Sstevel@tonic-gate}
136*0Sstevel@tonic-gate#match the correct UTF-8 string
137*0Sstevel@tonic-gateprint "not " unless  alleq($byte, $U);
138*0Sstevel@tonic-gateprint "ok 20\n";
139*0Sstevel@tonic-gate
140*0Sstevel@tonic-gate#do not match a wrong UTF-8 string
141*0Sstevel@tonic-gateprint "not " if anyeq($byte, $Ub);
142*0Sstevel@tonic-gateprint "ok 21\n";
143*0Sstevel@tonic-gate
144*0Sstevel@tonic-gate#string ordering
145*0Sstevel@tonic-gateprint "not " unless allgt ( $g1,    $byte  )  &&
146*0Sstevel@tonic-gate                    allgt ( $g2,    $byte  )  &&
147*0Sstevel@tonic-gate                    allgt ( $byte,  $l     )  &&
148*0Sstevel@tonic-gate                    allgt ( $bytes, $U     );
149*0Sstevel@tonic-gateprint "ok 22\n";
150*0Sstevel@tonic-gate
151*0Sstevel@tonic-gate# upgrade, downgrade
152*0Sstevel@tonic-gate
153*0Sstevel@tonic-gatemy ($u,$v,$v2);
154*0Sstevel@tonic-gate$u = $v = $v2 = pack("C*", 0xDF);
155*0Sstevel@tonic-gateutf8::upgrade($v);                   #explicit upgrade
156*0Sstevel@tonic-gate$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
157*0Sstevel@tonic-gate
158*0Sstevel@tonic-gate# implicit upgrade === explicit upgrade
159*0Sstevel@tonic-gateprint "not "  if do{{use bytes; $v ne $v2}} || $v ne $v2;
160*0Sstevel@tonic-gateprint "ok 23\n";
161*0Sstevel@tonic-gate
162*0Sstevel@tonic-gate# utf8::upgrade is transparent and does not break equality
163*0Sstevel@tonic-gateprint "not " unless alleq( $u, $v );
164*0Sstevel@tonic-gateprint "ok 24\n";
165*0Sstevel@tonic-gate
166*0Sstevel@tonic-gate$u = $v = pack("C*", 0xDF);
167*0Sstevel@tonic-gateutf8::upgrade($v);
168*0Sstevel@tonic-gate#test for a roundtrip, we should get back from where we left
169*0Sstevel@tonic-gateeval {utf8::downgrade( $v )};
170*0Sstevel@tonic-gateprint "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
171*0Sstevel@tonic-gateprint "ok 25\n";
172*0Sstevel@tonic-gate
173*0Sstevel@tonic-gate# some more eq, cmp
174*0Sstevel@tonic-gate
175*0Sstevel@tonic-gate$byte=pack("C*", 0xDF);
176*0Sstevel@tonic-gate
177*0Sstevel@tonic-gateprint "not " unless pack("U*", 0x3AF) eq $byte;
178*0Sstevel@tonic-gateprint "ok 26\n";
179*0Sstevel@tonic-gate
180*0Sstevel@tonic-gateprint "not " if chr(0xDF) cmp $byte;
181*0Sstevel@tonic-gateprint "ok 27\n";
182*0Sstevel@tonic-gate
183*0Sstevel@tonic-gateprint "not " unless ((pack("U*", 0x3B0)       cmp $byte) ==  1) &&
184*0Sstevel@tonic-gate                    ((pack("U*", 0x3AE)       cmp $byte) == -1) &&
185*0Sstevel@tonic-gate                    ((pack("U*", 0x3AF, 0x20) cmp $byte) ==  1) &&
186*0Sstevel@tonic-gate	            ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
187*0Sstevel@tonic-gateprint "ok 28\n";
188*0Sstevel@tonic-gate
189*0Sstevel@tonic-gate
190*0Sstevel@tonic-gate{
191*0Sstevel@tonic-gate    # Used to core dump in 5.7.3
192*0Sstevel@tonic-gate    no warnings; # so test goes noiselessly
193*0Sstevel@tonic-gate    print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n";
194*0Sstevel@tonic-gate}
195*0Sstevel@tonic-gate
196*0Sstevel@tonic-gate{
197*0Sstevel@tonic-gate	my %h1;
198*0Sstevel@tonic-gate	my %h2;
199*0Sstevel@tonic-gate	$h1{"\xdf"}    = 41;
200*0Sstevel@tonic-gate	$h2{"\x{3af}"} = 42;
201*0Sstevel@tonic-gate	print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
202*0Sstevel@tonic-gate	print $h2{"\xdf"}    == 42 ? "ok 31\n" : "not ok 31\n";
203*0Sstevel@tonic-gate}
204