xref: /openbsd-src/gnu/usr.bin/perl/t/lib/warnings/utf8 (revision e5157e49389faebcb42b7237d55fbf096d9c2523)
1
2  utf8.c AOK
3
4     [utf8_to_uvchr_buf]
5     Malformed UTF-8 character
6	my $a = ord "\x80" ;
7
8     Malformed UTF-8 character
9	my $a = ord "\xf080" ;
10     <<<<<< this warning can't be easily triggered from perl anymore
11
12     [utf16_to_utf8]
13     Malformed UTF-16 surrogate
14     <<<<<< Add a test when something actually calls utf16_to_utf8
15
16__END__
17# utf8.c [utf8_to_uvchr_buf] -W
18BEGIN {
19    if (ord('A') == 193) {
20        print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings.";
21        exit 0;
22    }
23}
24use utf8 ;
25my $a = "sn�storm" ;
26{
27    no warnings 'utf8' ;
28    my $a = "sn�storm";
29    use warnings 'utf8' ;
30    my $a = "sn�storm";
31}
32EXPECT
33Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9.
34Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
35########
36use warnings 'utf8';
37my $d7ff  = uc(chr(0xD7FF));
38my $d800  = uc(chr(0xD800));
39my $dfff  = uc(chr(0xDFFF));
40my $e000  = uc(chr(0xE000));
41my $feff  = uc(chr(0xFEFF));
42my $fffd  = uc(chr(0xFFFD));
43my $fffe  = uc(chr(0xFFFE));
44my $ffff  = uc(chr(0xFFFF));
45my $hex4  = uc(chr(0x10000));
46my $hex5  = uc(chr(0x100000));
47my $maxm1 = uc(chr(0x10FFFE));
48my $max   = uc(chr(0x10FFFF));
49my $nonUnicode =  uc(chr(0x110000));
50no warnings 'utf8';
51my $d7ff  = uc(chr(0xD7FF));
52my $d800  = uc(chr(0xD800));
53my $dfff  = uc(chr(0xDFFF));
54my $e000  = uc(chr(0xE000));
55my $feff  = uc(chr(0xFEFF));
56my $fffd  = uc(chr(0xFFFD));
57my $fffe  = uc(chr(0xFFFE));
58my $ffff  = uc(chr(0xFFFF));
59my $hex4  = uc(chr(0x10000));
60my $hex5  = uc(chr(0x100000));
61my $maxm1 = uc(chr(0x10FFFE));
62my $max   = uc(chr(0x10FFFF));
63my $nonUnicode =  uc(chr(0x110000));
64EXPECT
65Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
66Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
67Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
68########
69use warnings 'utf8';
70my $d800  = uc(chr(0xD800));
71my $nonUnicode =  uc(chr(0x110000));
72no warnings 'surrogate';
73my $d800  = uc(chr(0xD800));
74my $nonUnicode =  uc(chr(0x110000));
75EXPECT
76Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
78Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
79########
80use warnings 'utf8';
81my $d800  = uc(chr(0xD800));
82my $nonUnicode =  uc(chr(0x110000));
83my $big_nonUnicode = uc(chr(0x8000_0000));
84no warnings 'non_unicode';
85my $d800  = uc(chr(0xD800));
86my $nonUnicode =  uc(chr(0x110000));
87my $big_nonUnicode = uc(chr(0x8000_0000));
88EXPECT
89Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
90Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
91Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4.
92Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6.
93########
94use warnings 'utf8';
95my $d7ff  = lc pack("U", 0xD7FF);
96my $d800  = lc pack("U", 0xD800);
97my $dfff  = lc pack("U", 0xDFFF);
98my $e000  = lc pack("U", 0xE000);
99my $feff  = lc pack("U", 0xFEFF);
100my $fffd  = lc pack("U", 0xFFFD);
101my $fffe  = lc pack("U", 0xFFFE);
102my $ffff  = lc pack("U", 0xFFFF);
103my $hex4  = lc pack("U", 0x10000);
104my $hex5  = lc pack("U", 0x100000);
105my $maxm1 = lc pack("U", 0x10FFFE);
106my $max   = lc pack("U", 0x10FFFF);
107my $nonUnicode =  lc(pack("U", 0x110000));
108no warnings 'utf8';
109my $d7ff  = lc pack("U", 0xD7FF);
110my $d800  = lc pack("U", 0xD800);
111my $dfff  = lc pack("U", 0xDFFF);
112my $e000  = lc pack("U", 0xE000);
113my $feff  = lc pack("U", 0xFEFF);
114my $fffd  = lc pack("U", 0xFFFD);
115my $fffe  = lc pack("U", 0xFFFE);
116my $ffff  = lc pack("U", 0xFFFF);
117my $hex4  = lc pack("U", 0x10000);
118my $hex5  = lc pack("U", 0x100000);
119my $maxm1 = lc pack("U", 0x10FFFE);
120my $max   = lc pack("U", 0x10FFFF);
121my $nonUnicode =  lc(pack("U", 0x110000));
122EXPECT
123Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
124Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
125Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
126########
127use warnings 'utf8';
128my $d7ff  = ucfirst "\x{D7FF}";
129my $d800  = ucfirst "\x{D800}";
130my $dfff  = ucfirst "\x{DFFF}";
131my $e000  = ucfirst "\x{E000}";
132my $feff  = ucfirst "\x{FEFF}";
133my $fffd  = ucfirst "\x{FFFD}";
134my $fffe  = ucfirst "\x{FFFE}";
135my $ffff  = ucfirst "\x{FFFF}";
136my $hex4  = ucfirst "\x{10000}";
137my $hex5  = ucfirst "\x{100000}";
138my $maxm1 = ucfirst "\x{10FFFE}";
139my $max   = ucfirst "\x{10FFFF}";
140my $nonUnicode =  ucfirst "\x{110000}";
141no warnings 'utf8';
142my $d7ff  = ucfirst "\x{D7FF}";
143my $d800  = ucfirst "\x{D800}";
144my $dfff  = ucfirst "\x{DFFF}";
145my $e000  = ucfirst "\x{E000}";
146my $feff  = ucfirst "\x{FEFF}";
147my $fffd  = ucfirst "\x{FFFD}";
148my $fffe  = ucfirst "\x{FFFE}";
149my $ffff  = ucfirst "\x{FFFF}";
150my $hex4  = ucfirst "\x{10000}";
151my $hex5  = ucfirst "\x{100000}";
152my $maxm1 = ucfirst "\x{10FFFE}";
153my $max   = ucfirst "\x{10FFFF}";
154my $nonUnicode =  ucfirst "\x{110000}";
155EXPECT
156Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
157Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
158Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
159########
160# NAME Matching \p{} against above-Unicode
161use warnings 'utf8';
162chr(0xD7FF) =~ /\p{Any}/;
163chr(0xD800) =~ /\p{Any}/;
164chr(0xDFFF) =~ /\p{Any}/;
165chr(0xE000) =~ /\p{Any}/;
166chr(0xFEFF) =~ /\p{Any}/;
167chr(0xFFFD) =~ /\p{Any}/;
168chr(0xFFFE) =~ /\p{Any}/;
169chr(0xFFFF) =~ /\p{Any}/;
170chr(0x10000) =~ /\p{Any}/;
171chr(0x100000) =~ /\p{Any}/;
172chr(0x10FFFE) =~ /\p{Any}/;
173chr(0x10FFFF) =~ /\p{Any}/;
174chr(0x110000) =~ /[\p{Any}]/;
175chr(0x110001) =~ /[\w\p{Any}]/;
176chr(0x10FFFF) =~ /\p{All}/;
177chr(0x110002) =~ /[\w\p{All}]/;
178chr(0x110003) =~ /[\p{XPosixWord}]/;
179chr(0x110004) =~ /[\P{XPosixWord}]/;
180chr(0x110005) =~ /^[\p{Unassigned}]/;
181chr(0x110006) =~ /^[\P{Unassigned}]/;
182# Only Unicode properties give non-Unicode warnings, and only those properties
183# which do match above Unicode; and not when something else in the class
184# matches above Unicode.  Below we test three ways where something outside the
185# property may match non-Unicode: a code point above it, a class \S that we
186# know at compile time doesn't, and a class \W whose values aren't (at the time
187# of this writing) specified at compile time, but which wouldn't match
188chr(0x110050) =~ /\w/;
189chr(0x110051) =~ /\W/;
190chr(0x110052) =~ /\d/;
191chr(0x110053) =~ /\D/;
192chr(0x110054) =~ /\s/;
193chr(0x110055) =~ /\S/;
194chr(0x110056) =~ /[[:word:]]/;
195chr(0x110057) =~ /[[:^word:]]/;
196chr(0x110058) =~ /[[:alnum:]]/;
197chr(0x110059) =~ /[[:^alnum:]]/;
198chr(0x11005A) =~ /[[:space:]]/;
199chr(0x11005B) =~ /[[:^space:]]/;
200chr(0x11005C) =~ /[[:digit:]]/;
201chr(0x11005D) =~ /[[:^digit:]]/;
202chr(0x11005E) =~ /[[:alpha:]]/;
203chr(0x11005F) =~ /[[:^alpha:]]/;
204chr(0x110060) =~ /[[:ascii:]]/;
205chr(0x110061) =~ /[[:^ascii:]]/;
206chr(0x110062) =~ /[[:cntrl:]]/;
207chr(0x110063) =~ /[[:^cntrl:]]/;
208chr(0x110064) =~ /[[:graph:]]/;
209chr(0x110065) =~ /[[:^graph:]]/;
210chr(0x110066) =~ /[[:lower:]]/;
211chr(0x110067) =~ /[[:^lower:]]/;
212chr(0x110068) =~ /[[:print:]]/;
213chr(0x110069) =~ /[[:^print:]]/;
214chr(0x11006A) =~ /[[:punct:]]/;
215chr(0x11006B) =~ /[[:^punct:]]/;
216chr(0x11006C) =~ /[[:upper:]]/;
217chr(0x11006D) =~ /[[:^upper:]]/;
218chr(0x11006E) =~ /[[:xdigit:]]/;
219chr(0x11006F) =~ /[[:^xdigit:]]/;
220chr(0x110070) =~ /[[:blank:]]/;
221chr(0x110071) =~ /[[:^blank:]]/;
222chr(0x111010) =~ /[\W\p{Unassigned}]/;
223chr(0x111011) =~ /[\W\P{Unassigned}]/;
224chr(0x112010) =~ /[\S\p{Unassigned}]/;
225chr(0x112011) =~ /[\S\P{Unassigned}]/;
226chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
227chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
228no warnings 'utf8';
229chr(0xD7FF) =~ /\p{Any}/;
230chr(0xD800) =~ /\p{Any}/;
231chr(0xDFFF) =~ /\p{Any}/;
232chr(0xE000) =~ /\p{Any}/;
233chr(0xFEFF) =~ /\p{Any}/;
234chr(0xFFFD) =~ /\p{Any}/;
235chr(0xFFFE) =~ /\p{Any}/;
236chr(0xFFFF) =~ /\p{Any}/;
237chr(0x10000) =~ /\p{Any}/;
238chr(0x100000) =~ /\p{Any}/;
239chr(0x10FFFE) =~ /\p{Any}/;
240chr(0x10FFFF) =~ /\p{Any}/;
241chr(0x110000) =~ /[\p{Any}]/;
242chr(0x110001) =~ /[\w\p{Any}]/;
243chr(0x10FFFF) =~ /\p{All}/;
244chr(0x110002) =~ /[\w\p{All}]/;
245chr(0x110003) =~ /[\p{XPosixWord}]/;
246chr(0x110004) =~ /[\P{XPosixWord}]/;
247chr(0x110005) =~ /^[\p{Unassigned}]/;
248chr(0x110006) =~ /^[\P{Unassigned}]/;
249chr(0x110050) =~ /\w/;
250chr(0x110051) =~ /\W/;
251chr(0x110052) =~ /\d/;
252chr(0x110053) =~ /\D/;
253chr(0x110054) =~ /\s/;
254chr(0x110055) =~ /\S/;
255chr(0x110056) =~ /[[:word:]]/;
256chr(0x110057) =~ /[[:^word:]]/;
257chr(0x110058) =~ /[[:alnum:]]/;
258chr(0x110059) =~ /[[:^alnum:]]/;
259chr(0x11005A) =~ /[[:space:]]/;
260chr(0x11005B) =~ /[[:^space:]]/;
261chr(0x11005C) =~ /[[:digit:]]/;
262chr(0x11005D) =~ /[[:^digit:]]/;
263chr(0x11005E) =~ /[[:alpha:]]/;
264chr(0x11005F) =~ /[[:^alpha:]]/;
265chr(0x110060) =~ /[[:ascii:]]/;
266chr(0x110061) =~ /[[:^ascii:]]/;
267chr(0x110062) =~ /[[:cntrl:]]/;
268chr(0x110063) =~ /[[:^cntrl:]]/;
269chr(0x110064) =~ /[[:graph:]]/;
270chr(0x110065) =~ /[[:^graph:]]/;
271chr(0x110066) =~ /[[:lower:]]/;
272chr(0x110067) =~ /[[:^lower:]]/;
273chr(0x110068) =~ /[[:print:]]/;
274chr(0x110069) =~ /[[:^print:]]/;
275chr(0x11006A) =~ /[[:punct:]]/;
276chr(0x11006B) =~ /[[:^punct:]]/;
277chr(0x11006C) =~ /[[:upper:]]/;
278chr(0x11006D) =~ /[[:^upper:]]/;
279chr(0x11006E) =~ /[[:xdigit:]]/;
280chr(0x11006F) =~ /[[:^xdigit:]]/;
281chr(0x110070) =~ /[[:blank:]]/;
282chr(0x110071) =~ /[[:^blank:]]/;
283chr(0x111010) =~ /[\W\p{Unassigned}]/;
284chr(0x111011) =~ /[\W\P{Unassigned}]/;
285chr(0x112010) =~ /[\S\p{Unassigned}]/;
286chr(0x112011) =~ /[\S\P{Unassigned}]/;
287chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
288chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
289EXPECT
290Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20.
291Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21.
292########
293# NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class)
294# Now have to make FATAL to guarantee being output
295use warnings FATAL => 'non_unicode';
296"\x{110000}" =~ /b?\p{Space}/;
297EXPECT
298Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
299########
300# NAME Matching POSIX class property against above-Unicode code point doesn't output a warning
301use warnings 'non_unicode';
302use warnings FATAL => 'non_unicode';
303"\x{110000}" =~ /b?[[:space:]]/;
304EXPECT
305########
306use warnings 'utf8';
307chr(0x110000) =~ /\p{Any}/;
308########
309# NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning
310use warnings qw(utf8 non_unicode);
311chr(0x110000) =~ /^\p{Unassigned}/;
312no warnings 'non_unicode';
313chr(0x110001) =~ /\p{Unassigned}/;
314use warnings 'non_unicode';
315no warnings 'utf8';
316chr(0x110002) =~ /\p{Unassigned}/;
317EXPECT
318Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2.
319########
320# NAME optimizable regnode should still give non_unicode warnings when fatalized
321use warnings 'utf8';
322use warnings FATAL => 'non_unicode';
323chr(0x110000) =~ /\p{lb=cr}/;
324EXPECT
325Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
326########
327# NAME optimizable regnode should not give non_unicode warnings when warnings are off
328no warnings 'non_unicode';
329chr(0x110000) =~ /\p{lb=cr}/;
330EXPECT
331########
332# NAME 'All' matches above-Unicode without any warning
333use warnings qw(utf8 non_unicode);
334chr(0x110000) =~ /\p{All}/;
335EXPECT
336########
337require "../test.pl";
338use warnings 'utf8';
339sub Is_Super { return '!utf8::Any' }
340# The extra char is to avoid an optimization that avoids the problem when the
341# property is the only non-latin1 char in a class
342print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n";
343EXPECT
3441
345########
346require "../test.pl";
347use warnings 'utf8';
348my $file = tempfile();
349open(my $fh, "+>:utf8", $file);
350print $fh "\x{D7FF}", "\n";
351print $fh "\x{D800}", "\n";
352print $fh "\x{DFFF}", "\n";
353print $fh "\x{E000}", "\n";
354print $fh "\x{FDCF}", "\n";
355print $fh "\x{FDD0}", "\n";
356print $fh "\x{FDEF}", "\n";
357print $fh "\x{FDF0}", "\n";
358print $fh "\x{FEFF}", "\n";
359print $fh "\x{FFFD}", "\n";
360print $fh "\x{FFFE}", "\n";
361print $fh "\x{FFFF}", "\n";
362print $fh "\x{10000}", "\n";
363print $fh "\x{1FFFE}", "\n";
364print $fh "\x{1FFFF}", "\n";
365print $fh "\x{2FFFE}", "\n";
366print $fh "\x{2FFFF}", "\n";
367print $fh "\x{3FFFE}", "\n";
368print $fh "\x{3FFFF}", "\n";
369print $fh "\x{4FFFE}", "\n";
370print $fh "\x{4FFFF}", "\n";
371print $fh "\x{5FFFE}", "\n";
372print $fh "\x{5FFFF}", "\n";
373print $fh "\x{6FFFE}", "\n";
374print $fh "\x{6FFFF}", "\n";
375print $fh "\x{7FFFE}", "\n";
376print $fh "\x{7FFFF}", "\n";
377print $fh "\x{8FFFE}", "\n";
378print $fh "\x{8FFFF}", "\n";
379print $fh "\x{9FFFE}", "\n";
380print $fh "\x{9FFFF}", "\n";
381print $fh "\x{AFFFE}", "\n";
382print $fh "\x{AFFFF}", "\n";
383print $fh "\x{BFFFE}", "\n";
384print $fh "\x{BFFFF}", "\n";
385print $fh "\x{CFFFE}", "\n";
386print $fh "\x{CFFFF}", "\n";
387print $fh "\x{DFFFE}", "\n";
388print $fh "\x{DFFFF}", "\n";
389print $fh "\x{EFFFE}", "\n";
390print $fh "\x{EFFFF}", "\n";
391print $fh "\x{FFFFE}", "\n";
392print $fh "\x{FFFFF}", "\n";
393print $fh "\x{100000}", "\n";
394print $fh "\x{10FFFE}", "\n";
395print $fh "\x{10FFFF}", "\n";
396print $fh "\x{110000}", "\n";
397close $fh;
398EXPECT
399Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
400Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7.
401Unicode non-character U+FDD0 is illegal for open interchange at - line 10.
402Unicode non-character U+FDEF is illegal for open interchange at - line 11.
403Unicode non-character U+FFFE is illegal for open interchange at - line 15.
404Unicode non-character U+FFFF is illegal for open interchange at - line 16.
405Unicode non-character U+1FFFE is illegal for open interchange at - line 18.
406Unicode non-character U+1FFFF is illegal for open interchange at - line 19.
407Unicode non-character U+2FFFE is illegal for open interchange at - line 20.
408Unicode non-character U+2FFFF is illegal for open interchange at - line 21.
409Unicode non-character U+3FFFE is illegal for open interchange at - line 22.
410Unicode non-character U+3FFFF is illegal for open interchange at - line 23.
411Unicode non-character U+4FFFE is illegal for open interchange at - line 24.
412Unicode non-character U+4FFFF is illegal for open interchange at - line 25.
413Unicode non-character U+5FFFE is illegal for open interchange at - line 26.
414Unicode non-character U+5FFFF is illegal for open interchange at - line 27.
415Unicode non-character U+6FFFE is illegal for open interchange at - line 28.
416Unicode non-character U+6FFFF is illegal for open interchange at - line 29.
417Unicode non-character U+7FFFE is illegal for open interchange at - line 30.
418Unicode non-character U+7FFFF is illegal for open interchange at - line 31.
419Unicode non-character U+8FFFE is illegal for open interchange at - line 32.
420Unicode non-character U+8FFFF is illegal for open interchange at - line 33.
421Unicode non-character U+9FFFE is illegal for open interchange at - line 34.
422Unicode non-character U+9FFFF is illegal for open interchange at - line 35.
423Unicode non-character U+AFFFE is illegal for open interchange at - line 36.
424Unicode non-character U+AFFFF is illegal for open interchange at - line 37.
425Unicode non-character U+BFFFE is illegal for open interchange at - line 38.
426Unicode non-character U+BFFFF is illegal for open interchange at - line 39.
427Unicode non-character U+CFFFE is illegal for open interchange at - line 40.
428Unicode non-character U+CFFFF is illegal for open interchange at - line 41.
429Unicode non-character U+DFFFE is illegal for open interchange at - line 42.
430Unicode non-character U+DFFFF is illegal for open interchange at - line 43.
431Unicode non-character U+EFFFE is illegal for open interchange at - line 44.
432Unicode non-character U+EFFFF is illegal for open interchange at - line 45.
433Unicode non-character U+FFFFE is illegal for open interchange at - line 46.
434Unicode non-character U+FFFFF is illegal for open interchange at - line 47.
435Unicode non-character U+10FFFE is illegal for open interchange at - line 49.
436Unicode non-character U+10FFFF is illegal for open interchange at - line 50.
437Code point 0x110000 is not Unicode, may not be portable at - line 51.
438########
439require "../test.pl";
440use warnings 'utf8';
441my $file = tempfile();
442open(my $fh, "+>:utf8", $file);
443print $fh "\x{D800}", "\n";
444print $fh "\x{FFFF}", "\n";
445print $fh "\x{110000}", "\n";
446close $fh;
447EXPECT
448Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
449Unicode non-character U+FFFF is illegal for open interchange at - line 6.
450Code point 0x110000 is not Unicode, may not be portable at - line 7.
451########
452require "../test.pl";
453use warnings 'utf8';
454no warnings 'surrogate';
455my $file = tempfile();
456open(my $fh, "+>:utf8", $file);
457print $fh "\x{D800}", "\n";
458print $fh "\x{FFFF}", "\n";
459print $fh "\x{110000}", "\n";
460close $fh;
461EXPECT
462Unicode non-character U+FFFF is illegal for open interchange at - line 7.
463Code point 0x110000 is not Unicode, may not be portable at - line 8.
464########
465require "../test.pl";
466use warnings 'utf8';
467no warnings 'nonchar';
468my $file = tempfile();
469open(my $fh, "+>:utf8", $file);
470print $fh "\x{D800}", "\n";
471print $fh "\x{FFFF}", "\n";
472print $fh "\x{110000}", "\n";
473close $fh;
474EXPECT
475Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
476Code point 0x110000 is not Unicode, may not be portable at - line 8.
477########
478require "../test.pl";
479use warnings 'utf8';
480no warnings 'non_unicode';
481my $file = tempfile();
482open(my $fh, "+>:utf8", $file);
483print $fh "\x{D800}", "\n";
484print $fh "\x{FFFF}", "\n";
485print $fh "\x{110000}", "\n";
486close $fh;
487EXPECT
488Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
489Unicode non-character U+FFFF is illegal for open interchange at - line 7.
490########
491# NAME C<use warnings "nonchar"> works in isolation
492require "../test.pl";
493use warnings 'nonchar';
494my $file = tempfile();
495open(my $fh, "+>:utf8", $file);
496print $fh "\x{FFFF}", "\n";
497close $fh;
498EXPECT
499Unicode non-character U+FFFF is illegal for open interchange at - line 5.
500########
501# NAME C<use warnings "surrogate"> works in isolation
502require "../test.pl";
503use warnings 'surrogate';
504my $file = tempfile();
505open(my $fh, "+>:utf8", $file);
506print $fh "\x{D800}", "\n";
507close $fh;
508EXPECT
509Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
510########
511# NAME C<use warnings "non_unicode"> works in isolation
512require "../test.pl";
513use warnings 'non_unicode';
514my $file = tempfile();
515open(my $fh, "+>:utf8", $file);
516print $fh "\x{110000}", "\n";
517close $fh;
518EXPECT
519Code point 0x110000 is not Unicode, may not be portable at - line 5.
520########
521require "../test.pl";
522no warnings 'utf8';
523my $file = tempfile();
524open(my $fh, "+>:utf8", $file);
525print $fh "\x{D7FF}", "\n";
526print $fh "\x{D800}", "\n";
527print $fh "\x{DFFF}", "\n";
528print $fh "\x{E000}", "\n";
529print $fh "\x{FDCF}", "\n";
530print $fh "\x{FDD0}", "\n";
531print $fh "\x{FDEF}", "\n";
532print $fh "\x{FDF0}", "\n";
533print $fh "\x{FEFF}", "\n";
534print $fh "\x{FFFD}", "\n";
535print $fh "\x{FFFE}", "\n";
536print $fh "\x{FFFF}", "\n";
537print $fh "\x{10000}", "\n";
538print $fh "\x{1FFFE}", "\n";
539print $fh "\x{1FFFF}", "\n";
540print $fh "\x{2FFFE}", "\n";
541print $fh "\x{2FFFF}", "\n";
542print $fh "\x{3FFFE}", "\n";
543print $fh "\x{3FFFF}", "\n";
544print $fh "\x{4FFFE}", "\n";
545print $fh "\x{4FFFF}", "\n";
546print $fh "\x{5FFFE}", "\n";
547print $fh "\x{5FFFF}", "\n";
548print $fh "\x{6FFFE}", "\n";
549print $fh "\x{6FFFF}", "\n";
550print $fh "\x{7FFFE}", "\n";
551print $fh "\x{7FFFF}", "\n";
552print $fh "\x{8FFFE}", "\n";
553print $fh "\x{8FFFF}", "\n";
554print $fh "\x{9FFFE}", "\n";
555print $fh "\x{9FFFF}", "\n";
556print $fh "\x{AFFFE}", "\n";
557print $fh "\x{AFFFF}", "\n";
558print $fh "\x{BFFFE}", "\n";
559print $fh "\x{BFFFF}", "\n";
560print $fh "\x{CFFFE}", "\n";
561print $fh "\x{CFFFF}", "\n";
562print $fh "\x{DFFFE}", "\n";
563print $fh "\x{DFFFF}", "\n";
564print $fh "\x{EFFFE}", "\n";
565print $fh "\x{EFFFF}", "\n";
566print $fh "\x{FFFFE}", "\n";
567print $fh "\x{FFFFF}", "\n";
568print $fh "\x{100000}", "\n";
569print $fh "\x{10FFFE}", "\n";
570print $fh "\x{10FFFF}", "\n";
571print $fh "\x{110000}", "\n";
572close $fh;
573EXPECT
574