1 2 utf8.c AOK 3 4 [utf8_to_uvchr_buf] 5 Malformed UTF-8 character 6 my $a = ord "\x80" ; 7 8 Malformed UTF-8 character 9 my $a = ord "\xf080" ; 10 <<<<<< this warning can't be easily triggered from perl anymore 11 12 [utf16_to_utf8] 13 Malformed UTF-16 surrogate 14 <<<<<< Add a test when something actually calls utf16_to_utf8 15 16__END__ 17# utf8.c [utf8_to_uvchr_buf] -W 18BEGIN { 19 if (ord('A') == 193) { 20 print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings."; 21 exit 0; 22 } 23} 24use utf8 ; 25my $a = "sn�storm" ; 26{ 27 no warnings 'utf8' ; 28 my $a = "sn�storm"; 29 use warnings 'utf8' ; 30 my $a = "sn�storm"; 31} 32EXPECT 33Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9. 34Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. 35######## 36use warnings 'utf8'; 37my $d7ff = uc(chr(0xD7FF)); 38my $d800 = uc(chr(0xD800)); 39my $dfff = uc(chr(0xDFFF)); 40my $e000 = uc(chr(0xE000)); 41my $feff = uc(chr(0xFEFF)); 42my $fffd = uc(chr(0xFFFD)); 43my $fffe = uc(chr(0xFFFE)); 44my $ffff = uc(chr(0xFFFF)); 45my $hex4 = uc(chr(0x10000)); 46my $hex5 = uc(chr(0x100000)); 47my $maxm1 = uc(chr(0x10FFFE)); 48my $max = uc(chr(0x10FFFF)); 49my $nonUnicode = uc(chr(0x110000)); 50no warnings 'utf8'; 51my $d7ff = uc(chr(0xD7FF)); 52my $d800 = uc(chr(0xD800)); 53my $dfff = uc(chr(0xDFFF)); 54my $e000 = uc(chr(0xE000)); 55my $feff = uc(chr(0xFEFF)); 56my $fffd = uc(chr(0xFFFD)); 57my $fffe = uc(chr(0xFFFE)); 58my $ffff = uc(chr(0xFFFF)); 59my $hex4 = uc(chr(0x10000)); 60my $hex5 = uc(chr(0x100000)); 61my $maxm1 = uc(chr(0x10FFFE)); 62my $max = uc(chr(0x10FFFF)); 63my $nonUnicode = uc(chr(0x110000)); 64EXPECT 65Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 66Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 67Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. 68######## 69use warnings 'utf8'; 70my $d800 = uc(chr(0xD800)); 71my $nonUnicode = uc(chr(0x110000)); 72no warnings 'surrogate'; 73my $d800 = uc(chr(0xD800)); 74my $nonUnicode = uc(chr(0x110000)); 75EXPECT 76Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 78Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6. 79######## 80use warnings 'utf8'; 81my $d800 = uc(chr(0xD800)); 82my $nonUnicode = uc(chr(0x110000)); 83my $big_nonUnicode = uc(chr(0x8000_0000)); 84no warnings 'non_unicode'; 85my $d800 = uc(chr(0xD800)); 86my $nonUnicode = uc(chr(0x110000)); 87my $big_nonUnicode = uc(chr(0x8000_0000)); 88EXPECT 89Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 90Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 91Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4. 92Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6. 93######## 94use warnings 'utf8'; 95my $d7ff = lc pack("U", 0xD7FF); 96my $d800 = lc pack("U", 0xD800); 97my $dfff = lc pack("U", 0xDFFF); 98my $e000 = lc pack("U", 0xE000); 99my $feff = lc pack("U", 0xFEFF); 100my $fffd = lc pack("U", 0xFFFD); 101my $fffe = lc pack("U", 0xFFFE); 102my $ffff = lc pack("U", 0xFFFF); 103my $hex4 = lc pack("U", 0x10000); 104my $hex5 = lc pack("U", 0x100000); 105my $maxm1 = lc pack("U", 0x10FFFE); 106my $max = lc pack("U", 0x10FFFF); 107my $nonUnicode = lc(pack("U", 0x110000)); 108no warnings 'utf8'; 109my $d7ff = lc pack("U", 0xD7FF); 110my $d800 = lc pack("U", 0xD800); 111my $dfff = lc pack("U", 0xDFFF); 112my $e000 = lc pack("U", 0xE000); 113my $feff = lc pack("U", 0xFEFF); 114my $fffd = lc pack("U", 0xFFFD); 115my $fffe = lc pack("U", 0xFFFE); 116my $ffff = lc pack("U", 0xFFFF); 117my $hex4 = lc pack("U", 0x10000); 118my $hex5 = lc pack("U", 0x100000); 119my $maxm1 = lc pack("U", 0x10FFFE); 120my $max = lc pack("U", 0x10FFFF); 121my $nonUnicode = lc(pack("U", 0x110000)); 122EXPECT 123Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 124Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 125Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. 126######## 127use warnings 'utf8'; 128my $d7ff = ucfirst "\x{D7FF}"; 129my $d800 = ucfirst "\x{D800}"; 130my $dfff = ucfirst "\x{DFFF}"; 131my $e000 = ucfirst "\x{E000}"; 132my $feff = ucfirst "\x{FEFF}"; 133my $fffd = ucfirst "\x{FFFD}"; 134my $fffe = ucfirst "\x{FFFE}"; 135my $ffff = ucfirst "\x{FFFF}"; 136my $hex4 = ucfirst "\x{10000}"; 137my $hex5 = ucfirst "\x{100000}"; 138my $maxm1 = ucfirst "\x{10FFFE}"; 139my $max = ucfirst "\x{10FFFF}"; 140my $nonUnicode = ucfirst "\x{110000}"; 141no warnings 'utf8'; 142my $d7ff = ucfirst "\x{D7FF}"; 143my $d800 = ucfirst "\x{D800}"; 144my $dfff = ucfirst "\x{DFFF}"; 145my $e000 = ucfirst "\x{E000}"; 146my $feff = ucfirst "\x{FEFF}"; 147my $fffd = ucfirst "\x{FFFD}"; 148my $fffe = ucfirst "\x{FFFE}"; 149my $ffff = ucfirst "\x{FFFF}"; 150my $hex4 = ucfirst "\x{10000}"; 151my $hex5 = ucfirst "\x{100000}"; 152my $maxm1 = ucfirst "\x{10FFFE}"; 153my $max = ucfirst "\x{10FFFF}"; 154my $nonUnicode = ucfirst "\x{110000}"; 155EXPECT 156Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. 157Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 158Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. 159######## 160# NAME Matching \p{} against above-Unicode 161use warnings 'utf8'; 162chr(0xD7FF) =~ /\p{Any}/; 163chr(0xD800) =~ /\p{Any}/; 164chr(0xDFFF) =~ /\p{Any}/; 165chr(0xE000) =~ /\p{Any}/; 166chr(0xFEFF) =~ /\p{Any}/; 167chr(0xFFFD) =~ /\p{Any}/; 168chr(0xFFFE) =~ /\p{Any}/; 169chr(0xFFFF) =~ /\p{Any}/; 170chr(0x10000) =~ /\p{Any}/; 171chr(0x100000) =~ /\p{Any}/; 172chr(0x10FFFE) =~ /\p{Any}/; 173chr(0x10FFFF) =~ /\p{Any}/; 174chr(0x110000) =~ /[\p{Any}]/; 175chr(0x110001) =~ /[\w\p{Any}]/; 176chr(0x10FFFF) =~ /\p{All}/; 177chr(0x110002) =~ /[\w\p{All}]/; 178chr(0x110003) =~ /[\p{XPosixWord}]/; 179chr(0x110004) =~ /[\P{XPosixWord}]/; 180chr(0x110005) =~ /^[\p{Unassigned}]/; 181chr(0x110006) =~ /^[\P{Unassigned}]/; 182# Only Unicode properties give non-Unicode warnings, and only those properties 183# which do match above Unicode; and not when something else in the class 184# matches above Unicode. Below we test three ways where something outside the 185# property may match non-Unicode: a code point above it, a class \S that we 186# know at compile time doesn't, and a class \W whose values aren't (at the time 187# of this writing) specified at compile time, but which wouldn't match 188chr(0x110050) =~ /\w/; 189chr(0x110051) =~ /\W/; 190chr(0x110052) =~ /\d/; 191chr(0x110053) =~ /\D/; 192chr(0x110054) =~ /\s/; 193chr(0x110055) =~ /\S/; 194chr(0x110056) =~ /[[:word:]]/; 195chr(0x110057) =~ /[[:^word:]]/; 196chr(0x110058) =~ /[[:alnum:]]/; 197chr(0x110059) =~ /[[:^alnum:]]/; 198chr(0x11005A) =~ /[[:space:]]/; 199chr(0x11005B) =~ /[[:^space:]]/; 200chr(0x11005C) =~ /[[:digit:]]/; 201chr(0x11005D) =~ /[[:^digit:]]/; 202chr(0x11005E) =~ /[[:alpha:]]/; 203chr(0x11005F) =~ /[[:^alpha:]]/; 204chr(0x110060) =~ /[[:ascii:]]/; 205chr(0x110061) =~ /[[:^ascii:]]/; 206chr(0x110062) =~ /[[:cntrl:]]/; 207chr(0x110063) =~ /[[:^cntrl:]]/; 208chr(0x110064) =~ /[[:graph:]]/; 209chr(0x110065) =~ /[[:^graph:]]/; 210chr(0x110066) =~ /[[:lower:]]/; 211chr(0x110067) =~ /[[:^lower:]]/; 212chr(0x110068) =~ /[[:print:]]/; 213chr(0x110069) =~ /[[:^print:]]/; 214chr(0x11006A) =~ /[[:punct:]]/; 215chr(0x11006B) =~ /[[:^punct:]]/; 216chr(0x11006C) =~ /[[:upper:]]/; 217chr(0x11006D) =~ /[[:^upper:]]/; 218chr(0x11006E) =~ /[[:xdigit:]]/; 219chr(0x11006F) =~ /[[:^xdigit:]]/; 220chr(0x110070) =~ /[[:blank:]]/; 221chr(0x110071) =~ /[[:^blank:]]/; 222chr(0x111010) =~ /[\W\p{Unassigned}]/; 223chr(0x111011) =~ /[\W\P{Unassigned}]/; 224chr(0x112010) =~ /[\S\p{Unassigned}]/; 225chr(0x112011) =~ /[\S\P{Unassigned}]/; 226chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; 227chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; 228no warnings 'utf8'; 229chr(0xD7FF) =~ /\p{Any}/; 230chr(0xD800) =~ /\p{Any}/; 231chr(0xDFFF) =~ /\p{Any}/; 232chr(0xE000) =~ /\p{Any}/; 233chr(0xFEFF) =~ /\p{Any}/; 234chr(0xFFFD) =~ /\p{Any}/; 235chr(0xFFFE) =~ /\p{Any}/; 236chr(0xFFFF) =~ /\p{Any}/; 237chr(0x10000) =~ /\p{Any}/; 238chr(0x100000) =~ /\p{Any}/; 239chr(0x10FFFE) =~ /\p{Any}/; 240chr(0x10FFFF) =~ /\p{Any}/; 241chr(0x110000) =~ /[\p{Any}]/; 242chr(0x110001) =~ /[\w\p{Any}]/; 243chr(0x10FFFF) =~ /\p{All}/; 244chr(0x110002) =~ /[\w\p{All}]/; 245chr(0x110003) =~ /[\p{XPosixWord}]/; 246chr(0x110004) =~ /[\P{XPosixWord}]/; 247chr(0x110005) =~ /^[\p{Unassigned}]/; 248chr(0x110006) =~ /^[\P{Unassigned}]/; 249chr(0x110050) =~ /\w/; 250chr(0x110051) =~ /\W/; 251chr(0x110052) =~ /\d/; 252chr(0x110053) =~ /\D/; 253chr(0x110054) =~ /\s/; 254chr(0x110055) =~ /\S/; 255chr(0x110056) =~ /[[:word:]]/; 256chr(0x110057) =~ /[[:^word:]]/; 257chr(0x110058) =~ /[[:alnum:]]/; 258chr(0x110059) =~ /[[:^alnum:]]/; 259chr(0x11005A) =~ /[[:space:]]/; 260chr(0x11005B) =~ /[[:^space:]]/; 261chr(0x11005C) =~ /[[:digit:]]/; 262chr(0x11005D) =~ /[[:^digit:]]/; 263chr(0x11005E) =~ /[[:alpha:]]/; 264chr(0x11005F) =~ /[[:^alpha:]]/; 265chr(0x110060) =~ /[[:ascii:]]/; 266chr(0x110061) =~ /[[:^ascii:]]/; 267chr(0x110062) =~ /[[:cntrl:]]/; 268chr(0x110063) =~ /[[:^cntrl:]]/; 269chr(0x110064) =~ /[[:graph:]]/; 270chr(0x110065) =~ /[[:^graph:]]/; 271chr(0x110066) =~ /[[:lower:]]/; 272chr(0x110067) =~ /[[:^lower:]]/; 273chr(0x110068) =~ /[[:print:]]/; 274chr(0x110069) =~ /[[:^print:]]/; 275chr(0x11006A) =~ /[[:punct:]]/; 276chr(0x11006B) =~ /[[:^punct:]]/; 277chr(0x11006C) =~ /[[:upper:]]/; 278chr(0x11006D) =~ /[[:^upper:]]/; 279chr(0x11006E) =~ /[[:xdigit:]]/; 280chr(0x11006F) =~ /[[:^xdigit:]]/; 281chr(0x110070) =~ /[[:blank:]]/; 282chr(0x110071) =~ /[[:^blank:]]/; 283chr(0x111010) =~ /[\W\p{Unassigned}]/; 284chr(0x111011) =~ /[\W\P{Unassigned}]/; 285chr(0x112010) =~ /[\S\p{Unassigned}]/; 286chr(0x112011) =~ /[\S\P{Unassigned}]/; 287chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; 288chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; 289EXPECT 290Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20. 291Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21. 292######## 293# NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class) 294# Now have to make FATAL to guarantee being output 295use warnings FATAL => 'non_unicode'; 296"\x{110000}" =~ /b?\p{Space}/; 297EXPECT 298Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. 299######## 300# NAME Matching POSIX class property against above-Unicode code point doesn't output a warning 301use warnings 'non_unicode'; 302use warnings FATAL => 'non_unicode'; 303"\x{110000}" =~ /b?[[:space:]]/; 304EXPECT 305######## 306use warnings 'utf8'; 307chr(0x110000) =~ /\p{Any}/; 308######## 309# NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning 310use warnings qw(utf8 non_unicode); 311chr(0x110000) =~ /^\p{Unassigned}/; 312no warnings 'non_unicode'; 313chr(0x110001) =~ /\p{Unassigned}/; 314use warnings 'non_unicode'; 315no warnings 'utf8'; 316chr(0x110002) =~ /\p{Unassigned}/; 317EXPECT 318Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2. 319######## 320# NAME optimizable regnode should still give non_unicode warnings when fatalized 321use warnings 'utf8'; 322use warnings FATAL => 'non_unicode'; 323chr(0x110000) =~ /\p{lb=cr}/; 324EXPECT 325Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. 326######## 327# NAME optimizable regnode should not give non_unicode warnings when warnings are off 328no warnings 'non_unicode'; 329chr(0x110000) =~ /\p{lb=cr}/; 330EXPECT 331######## 332# NAME 'All' matches above-Unicode without any warning 333use warnings qw(utf8 non_unicode); 334chr(0x110000) =~ /\p{All}/; 335EXPECT 336######## 337require "../test.pl"; 338use warnings 'utf8'; 339sub Is_Super { return '!utf8::Any' } 340# The extra char is to avoid an optimization that avoids the problem when the 341# property is the only non-latin1 char in a class 342print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n"; 343EXPECT 3441 345######## 346require "../test.pl"; 347use warnings 'utf8'; 348my $file = tempfile(); 349open(my $fh, "+>:utf8", $file); 350print $fh "\x{D7FF}", "\n"; 351print $fh "\x{D800}", "\n"; 352print $fh "\x{DFFF}", "\n"; 353print $fh "\x{E000}", "\n"; 354print $fh "\x{FDCF}", "\n"; 355print $fh "\x{FDD0}", "\n"; 356print $fh "\x{FDEF}", "\n"; 357print $fh "\x{FDF0}", "\n"; 358print $fh "\x{FEFF}", "\n"; 359print $fh "\x{FFFD}", "\n"; 360print $fh "\x{FFFE}", "\n"; 361print $fh "\x{FFFF}", "\n"; 362print $fh "\x{10000}", "\n"; 363print $fh "\x{1FFFE}", "\n"; 364print $fh "\x{1FFFF}", "\n"; 365print $fh "\x{2FFFE}", "\n"; 366print $fh "\x{2FFFF}", "\n"; 367print $fh "\x{3FFFE}", "\n"; 368print $fh "\x{3FFFF}", "\n"; 369print $fh "\x{4FFFE}", "\n"; 370print $fh "\x{4FFFF}", "\n"; 371print $fh "\x{5FFFE}", "\n"; 372print $fh "\x{5FFFF}", "\n"; 373print $fh "\x{6FFFE}", "\n"; 374print $fh "\x{6FFFF}", "\n"; 375print $fh "\x{7FFFE}", "\n"; 376print $fh "\x{7FFFF}", "\n"; 377print $fh "\x{8FFFE}", "\n"; 378print $fh "\x{8FFFF}", "\n"; 379print $fh "\x{9FFFE}", "\n"; 380print $fh "\x{9FFFF}", "\n"; 381print $fh "\x{AFFFE}", "\n"; 382print $fh "\x{AFFFF}", "\n"; 383print $fh "\x{BFFFE}", "\n"; 384print $fh "\x{BFFFF}", "\n"; 385print $fh "\x{CFFFE}", "\n"; 386print $fh "\x{CFFFF}", "\n"; 387print $fh "\x{DFFFE}", "\n"; 388print $fh "\x{DFFFF}", "\n"; 389print $fh "\x{EFFFE}", "\n"; 390print $fh "\x{EFFFF}", "\n"; 391print $fh "\x{FFFFE}", "\n"; 392print $fh "\x{FFFFF}", "\n"; 393print $fh "\x{100000}", "\n"; 394print $fh "\x{10FFFE}", "\n"; 395print $fh "\x{10FFFF}", "\n"; 396print $fh "\x{110000}", "\n"; 397close $fh; 398EXPECT 399Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 400Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7. 401Unicode non-character U+FDD0 is illegal for open interchange at - line 10. 402Unicode non-character U+FDEF is illegal for open interchange at - line 11. 403Unicode non-character U+FFFE is illegal for open interchange at - line 15. 404Unicode non-character U+FFFF is illegal for open interchange at - line 16. 405Unicode non-character U+1FFFE is illegal for open interchange at - line 18. 406Unicode non-character U+1FFFF is illegal for open interchange at - line 19. 407Unicode non-character U+2FFFE is illegal for open interchange at - line 20. 408Unicode non-character U+2FFFF is illegal for open interchange at - line 21. 409Unicode non-character U+3FFFE is illegal for open interchange at - line 22. 410Unicode non-character U+3FFFF is illegal for open interchange at - line 23. 411Unicode non-character U+4FFFE is illegal for open interchange at - line 24. 412Unicode non-character U+4FFFF is illegal for open interchange at - line 25. 413Unicode non-character U+5FFFE is illegal for open interchange at - line 26. 414Unicode non-character U+5FFFF is illegal for open interchange at - line 27. 415Unicode non-character U+6FFFE is illegal for open interchange at - line 28. 416Unicode non-character U+6FFFF is illegal for open interchange at - line 29. 417Unicode non-character U+7FFFE is illegal for open interchange at - line 30. 418Unicode non-character U+7FFFF is illegal for open interchange at - line 31. 419Unicode non-character U+8FFFE is illegal for open interchange at - line 32. 420Unicode non-character U+8FFFF is illegal for open interchange at - line 33. 421Unicode non-character U+9FFFE is illegal for open interchange at - line 34. 422Unicode non-character U+9FFFF is illegal for open interchange at - line 35. 423Unicode non-character U+AFFFE is illegal for open interchange at - line 36. 424Unicode non-character U+AFFFF is illegal for open interchange at - line 37. 425Unicode non-character U+BFFFE is illegal for open interchange at - line 38. 426Unicode non-character U+BFFFF is illegal for open interchange at - line 39. 427Unicode non-character U+CFFFE is illegal for open interchange at - line 40. 428Unicode non-character U+CFFFF is illegal for open interchange at - line 41. 429Unicode non-character U+DFFFE is illegal for open interchange at - line 42. 430Unicode non-character U+DFFFF is illegal for open interchange at - line 43. 431Unicode non-character U+EFFFE is illegal for open interchange at - line 44. 432Unicode non-character U+EFFFF is illegal for open interchange at - line 45. 433Unicode non-character U+FFFFE is illegal for open interchange at - line 46. 434Unicode non-character U+FFFFF is illegal for open interchange at - line 47. 435Unicode non-character U+10FFFE is illegal for open interchange at - line 49. 436Unicode non-character U+10FFFF is illegal for open interchange at - line 50. 437Code point 0x110000 is not Unicode, may not be portable at - line 51. 438######## 439require "../test.pl"; 440use warnings 'utf8'; 441my $file = tempfile(); 442open(my $fh, "+>:utf8", $file); 443print $fh "\x{D800}", "\n"; 444print $fh "\x{FFFF}", "\n"; 445print $fh "\x{110000}", "\n"; 446close $fh; 447EXPECT 448Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 449Unicode non-character U+FFFF is illegal for open interchange at - line 6. 450Code point 0x110000 is not Unicode, may not be portable at - line 7. 451######## 452require "../test.pl"; 453use warnings 'utf8'; 454no warnings 'surrogate'; 455my $file = tempfile(); 456open(my $fh, "+>:utf8", $file); 457print $fh "\x{D800}", "\n"; 458print $fh "\x{FFFF}", "\n"; 459print $fh "\x{110000}", "\n"; 460close $fh; 461EXPECT 462Unicode non-character U+FFFF is illegal for open interchange at - line 7. 463Code point 0x110000 is not Unicode, may not be portable at - line 8. 464######## 465require "../test.pl"; 466use warnings 'utf8'; 467no warnings 'nonchar'; 468my $file = tempfile(); 469open(my $fh, "+>:utf8", $file); 470print $fh "\x{D800}", "\n"; 471print $fh "\x{FFFF}", "\n"; 472print $fh "\x{110000}", "\n"; 473close $fh; 474EXPECT 475Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 476Code point 0x110000 is not Unicode, may not be portable at - line 8. 477######## 478require "../test.pl"; 479use warnings 'utf8'; 480no warnings 'non_unicode'; 481my $file = tempfile(); 482open(my $fh, "+>:utf8", $file); 483print $fh "\x{D800}", "\n"; 484print $fh "\x{FFFF}", "\n"; 485print $fh "\x{110000}", "\n"; 486close $fh; 487EXPECT 488Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 489Unicode non-character U+FFFF is illegal for open interchange at - line 7. 490######## 491# NAME C<use warnings "nonchar"> works in isolation 492require "../test.pl"; 493use warnings 'nonchar'; 494my $file = tempfile(); 495open(my $fh, "+>:utf8", $file); 496print $fh "\x{FFFF}", "\n"; 497close $fh; 498EXPECT 499Unicode non-character U+FFFF is illegal for open interchange at - line 5. 500######## 501# NAME C<use warnings "surrogate"> works in isolation 502require "../test.pl"; 503use warnings 'surrogate'; 504my $file = tempfile(); 505open(my $fh, "+>:utf8", $file); 506print $fh "\x{D800}", "\n"; 507close $fh; 508EXPECT 509Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 510######## 511# NAME C<use warnings "non_unicode"> works in isolation 512require "../test.pl"; 513use warnings 'non_unicode'; 514my $file = tempfile(); 515open(my $fh, "+>:utf8", $file); 516print $fh "\x{110000}", "\n"; 517close $fh; 518EXPECT 519Code point 0x110000 is not Unicode, may not be portable at - line 5. 520######## 521require "../test.pl"; 522no warnings 'utf8'; 523my $file = tempfile(); 524open(my $fh, "+>:utf8", $file); 525print $fh "\x{D7FF}", "\n"; 526print $fh "\x{D800}", "\n"; 527print $fh "\x{DFFF}", "\n"; 528print $fh "\x{E000}", "\n"; 529print $fh "\x{FDCF}", "\n"; 530print $fh "\x{FDD0}", "\n"; 531print $fh "\x{FDEF}", "\n"; 532print $fh "\x{FDF0}", "\n"; 533print $fh "\x{FEFF}", "\n"; 534print $fh "\x{FFFD}", "\n"; 535print $fh "\x{FFFE}", "\n"; 536print $fh "\x{FFFF}", "\n"; 537print $fh "\x{10000}", "\n"; 538print $fh "\x{1FFFE}", "\n"; 539print $fh "\x{1FFFF}", "\n"; 540print $fh "\x{2FFFE}", "\n"; 541print $fh "\x{2FFFF}", "\n"; 542print $fh "\x{3FFFE}", "\n"; 543print $fh "\x{3FFFF}", "\n"; 544print $fh "\x{4FFFE}", "\n"; 545print $fh "\x{4FFFF}", "\n"; 546print $fh "\x{5FFFE}", "\n"; 547print $fh "\x{5FFFF}", "\n"; 548print $fh "\x{6FFFE}", "\n"; 549print $fh "\x{6FFFF}", "\n"; 550print $fh "\x{7FFFE}", "\n"; 551print $fh "\x{7FFFF}", "\n"; 552print $fh "\x{8FFFE}", "\n"; 553print $fh "\x{8FFFF}", "\n"; 554print $fh "\x{9FFFE}", "\n"; 555print $fh "\x{9FFFF}", "\n"; 556print $fh "\x{AFFFE}", "\n"; 557print $fh "\x{AFFFF}", "\n"; 558print $fh "\x{BFFFE}", "\n"; 559print $fh "\x{BFFFF}", "\n"; 560print $fh "\x{CFFFE}", "\n"; 561print $fh "\x{CFFFF}", "\n"; 562print $fh "\x{DFFFE}", "\n"; 563print $fh "\x{DFFFF}", "\n"; 564print $fh "\x{EFFFE}", "\n"; 565print $fh "\x{EFFFF}", "\n"; 566print $fh "\x{FFFFE}", "\n"; 567print $fh "\x{FFFFF}", "\n"; 568print $fh "\x{100000}", "\n"; 569print $fh "\x{10FFFE}", "\n"; 570print $fh "\x{10FFFF}", "\n"; 571print $fh "\x{110000}", "\n"; 572close $fh; 573EXPECT 574