1 2 utf8.c AOK 3 4 [utf8_to_uvchr_buf] 5 Malformed UTF-8 character 6 my $a = ord "\x80" ; 7 8 Malformed UTF-8 character 9 my $a = ord "\xf080" ; 10 <<<<<< this warning can't be easily triggered from perl anymore 11 12 [utf16_to_utf8] 13 Malformed UTF-16 surrogate 14 <<<<<< Add a test when something actually calls utf16_to_utf8 15 16__END__ 17# utf8.c [utf8_to_uvchr_buf] -W 18# NAME Malformed under 'use utf8' in double-quoted string 19BEGIN { 20 if (ord('A') == 193) { 21 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; 22 exit 0; 23 } 24} 25use utf8 ; 26no warnings; # Malformed is a fatal error, so gets output anyway. 27my $a = "sn�storm" ; 28EXPECT 29Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 10. 30Malformed UTF-8 character (fatal) at - line 10. 31######## 32# NAME Malformed under 'use utf8' in single-quoted string 33BEGIN { 34 if (ord('A') == 193) { 35 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; 36 exit 0; 37 } 38} 39use utf8 ; 40no warnings; # Malformed is a fatal error, so gets output anyway. 41my $a = 'sn�storm' ; 42EXPECT 43Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 9. 44Malformed UTF-8 character (fatal) at - line 9. 45######## 46use warnings 'utf8'; 47my $d7ff = uc(chr(0xD7FF)); 48my $d800 = uc(chr(0xD800)); 49my $dfff = uc(chr(0xDFFF)); 50my $e000 = uc(chr(0xE000)); 51my $feff = uc(chr(0xFEFF)); 52my $fffd = uc(chr(0xFFFD)); 53my $fffe = uc(chr(0xFFFE)); 54my $ffff = uc(chr(0xFFFF)); 55my $hex4 = uc(chr(0x10000)); 56my $hex5 = uc(chr(0x100000)); 57my $maxm1 = uc(chr(0x10FFFE)); 58my $max = uc(chr(0x10FFFF)); 59my $nonUnicode = uc(chr(0x110000)); 60no warnings 'utf8'; 61my $d7ff = uc(chr(0xD7FF)); 62my $d800 = uc(chr(0xD800)); 63my $dfff = uc(chr(0xDFFF)); 64my $e000 = uc(chr(0xE000)); 65my $feff = uc(chr(0xFEFF)); 66my $fffd = uc(chr(0xFFFD)); 67my $fffe = uc(chr(0xFFFE)); 68my $ffff = uc(chr(0xFFFF)); 69my $hex4 = uc(chr(0x10000)); 70my $hex5 = uc(chr(0x100000)); 71my $maxm1 = uc(chr(0x10FFFE)); 72my $max = uc(chr(0x10FFFF)); 73my $nonUnicode = uc(chr(0x110000)); 74EXPECT 75Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 76Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. 78######## 79use warnings 'utf8'; 80my $d800 = uc(chr(0xD800)); 81my $nonUnicode = uc(chr(0x110000)); 82no warnings 'surrogate'; 83my $d800 = uc(chr(0xD800)); 84my $nonUnicode = uc(chr(0x110000)); 85EXPECT 86Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 87Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 88Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6. 89######## 90use warnings 'utf8'; 91my $d800 = uc(chr(0xD800)); 92my $nonUnicode = uc(chr(0x110000)); 93no warnings 'non_unicode'; 94my $d800 = uc(chr(0xD800)); 95my $nonUnicode = uc(chr(0x110000)); 96EXPECT 97Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 98Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 99Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5. 100######## 101use warnings 'utf8'; 102my $big_nonUnicode = uc(chr(0x7FFF_FFFF)); 103no warnings 'non_unicode'; 104my $big_nonUnicode = uc(chr(0x7FFF_FFFF)); 105EXPECT 106Operation "uc" returns its argument for non-Unicode code point 0x7FFFFFFF at - line 2. 107######## 108use warnings 'utf8'; 109my $d7ff = lc pack("U", 0xD7FF); 110my $d800 = lc pack("U", 0xD800); 111my $dfff = lc pack("U", 0xDFFF); 112my $e000 = lc pack("U", 0xE000); 113my $feff = lc pack("U", 0xFEFF); 114my $fffd = lc pack("U", 0xFFFD); 115my $fffe = lc pack("U", 0xFFFE); 116my $ffff = lc pack("U", 0xFFFF); 117my $hex4 = lc pack("U", 0x10000); 118my $hex5 = lc pack("U", 0x100000); 119my $maxm1 = lc pack("U", 0x10FFFE); 120my $max = lc pack("U", 0x10FFFF); 121my $nonUnicode = lc(pack("U", 0x110000)); 122no warnings 'utf8'; 123my $d7ff = lc pack("U", 0xD7FF); 124my $d800 = lc pack("U", 0xD800); 125my $dfff = lc pack("U", 0xDFFF); 126my $e000 = lc pack("U", 0xE000); 127my $feff = lc pack("U", 0xFEFF); 128my $fffd = lc pack("U", 0xFFFD); 129my $fffe = lc pack("U", 0xFFFE); 130my $ffff = lc pack("U", 0xFFFF); 131my $hex4 = lc pack("U", 0x10000); 132my $hex5 = lc pack("U", 0x100000); 133my $maxm1 = lc pack("U", 0x10FFFE); 134my $max = lc pack("U", 0x10FFFF); 135my $nonUnicode = lc(pack("U", 0x110000)); 136EXPECT 137Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 138Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 139Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. 140######## 141use warnings 'utf8'; 142my $d7ff = ucfirst "\x{D7FF}"; 143my $d800 = ucfirst "\x{D800}"; 144my $dfff = ucfirst "\x{DFFF}"; 145my $e000 = ucfirst "\x{E000}"; 146my $feff = ucfirst "\x{FEFF}"; 147my $fffd = ucfirst "\x{FFFD}"; 148my $fffe = ucfirst "\x{FFFE}"; 149my $ffff = ucfirst "\x{FFFF}"; 150my $hex4 = ucfirst "\x{10000}"; 151my $hex5 = ucfirst "\x{100000}"; 152my $maxm1 = ucfirst "\x{10FFFE}"; 153my $max = ucfirst "\x{10FFFF}"; 154my $nonUnicode = ucfirst "\x{110000}"; 155no warnings 'utf8'; 156my $d7ff = ucfirst "\x{D7FF}"; 157my $d800 = ucfirst "\x{D800}"; 158my $dfff = ucfirst "\x{DFFF}"; 159my $e000 = ucfirst "\x{E000}"; 160my $feff = ucfirst "\x{FEFF}"; 161my $fffd = ucfirst "\x{FFFD}"; 162my $fffe = ucfirst "\x{FFFE}"; 163my $ffff = ucfirst "\x{FFFF}"; 164my $hex4 = ucfirst "\x{10000}"; 165my $hex5 = ucfirst "\x{100000}"; 166my $maxm1 = ucfirst "\x{10FFFE}"; 167my $max = ucfirst "\x{10FFFF}"; 168my $nonUnicode = ucfirst "\x{110000}"; 169EXPECT 170Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. 171Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 172Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. 173######## 174# NAME Matching \p{} against above-Unicode 175use warnings 'utf8'; 176chr(0xD7FF) =~ /\p{Any}/; 177chr(0xD800) =~ /\p{Any}/; 178chr(0xDFFF) =~ /\p{Any}/; 179chr(0xE000) =~ /\p{Any}/; 180chr(0xFEFF) =~ /\p{Any}/; 181chr(0xFFFD) =~ /\p{Any}/; 182chr(0xFFFE) =~ /\p{Any}/; 183chr(0xFFFF) =~ /\p{Any}/; 184chr(0x10000) =~ /\p{Any}/; 185chr(0x100000) =~ /\p{Any}/; 186chr(0x10FFFE) =~ /\p{Any}/; 187chr(0x10FFFF) =~ /\p{Any}/; 188chr(0x110000) =~ /[\p{Any}]/; 189chr(0x110001) =~ /[\w\p{Any}]/; 190chr(0x10FFFF) =~ /\p{All}/; 191chr(0x110002) =~ /[\w\p{All}]/; 192chr(0x110003) =~ /[\p{XPosixWord}]/; 193chr(0x110004) =~ /[\P{XPosixWord}]/; 194chr(0x110005) =~ /^[\p{Unassigned}]/; 195chr(0x110006) =~ /^[\P{Unassigned}]/; 196# Only Unicode properties give non-Unicode warnings, and only those properties 197# which do match above Unicode; and not when something else in the class 198# matches above Unicode. Below we test three ways where something outside the 199# property may match non-Unicode: a code point above it, a class \S that we 200# know at compile time doesn't, and a class \W whose values aren't (at the time 201# of this writing) specified at compile time, but which wouldn't match 202chr(0x110050) =~ /\w/; 203chr(0x110051) =~ /\W/; 204chr(0x110052) =~ /\d/; 205chr(0x110053) =~ /\D/; 206chr(0x110054) =~ /\s/; 207chr(0x110055) =~ /\S/; 208chr(0x110056) =~ /[[:word:]]/; 209chr(0x110057) =~ /[[:^word:]]/; 210chr(0x110058) =~ /[[:alnum:]]/; 211chr(0x110059) =~ /[[:^alnum:]]/; 212chr(0x11005A) =~ /[[:space:]]/; 213chr(0x11005B) =~ /[[:^space:]]/; 214chr(0x11005C) =~ /[[:digit:]]/; 215chr(0x11005D) =~ /[[:^digit:]]/; 216chr(0x11005E) =~ /[[:alpha:]]/; 217chr(0x11005F) =~ /[[:^alpha:]]/; 218chr(0x110060) =~ /[[:ascii:]]/; 219chr(0x110061) =~ /[[:^ascii:]]/; 220chr(0x110062) =~ /[[:cntrl:]]/; 221chr(0x110063) =~ /[[:^cntrl:]]/; 222chr(0x110064) =~ /[[:graph:]]/; 223chr(0x110065) =~ /[[:^graph:]]/; 224chr(0x110066) =~ /[[:lower:]]/; 225chr(0x110067) =~ /[[:^lower:]]/; 226chr(0x110068) =~ /[[:print:]]/; 227chr(0x110069) =~ /[[:^print:]]/; 228chr(0x11006A) =~ /[[:punct:]]/; 229chr(0x11006B) =~ /[[:^punct:]]/; 230chr(0x11006C) =~ /[[:upper:]]/; 231chr(0x11006D) =~ /[[:^upper:]]/; 232chr(0x11006E) =~ /[[:xdigit:]]/; 233chr(0x11006F) =~ /[[:^xdigit:]]/; 234chr(0x110070) =~ /[[:blank:]]/; 235chr(0x110071) =~ /[[:^blank:]]/; 236chr(0x111010) =~ /[\W\p{Unassigned}]/; 237chr(0x111011) =~ /[\W\P{Unassigned}]/; 238chr(0x112010) =~ /[\S\p{Unassigned}]/; 239chr(0x112011) =~ /[\S\P{Unassigned}]/; 240chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; 241chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; 242no warnings 'utf8'; 243chr(0xD7FF) =~ /\p{Any}/; 244chr(0xD800) =~ /\p{Any}/; 245chr(0xDFFF) =~ /\p{Any}/; 246chr(0xE000) =~ /\p{Any}/; 247chr(0xFEFF) =~ /\p{Any}/; 248chr(0xFFFD) =~ /\p{Any}/; 249chr(0xFFFE) =~ /\p{Any}/; 250chr(0xFFFF) =~ /\p{Any}/; 251chr(0x10000) =~ /\p{Any}/; 252chr(0x100000) =~ /\p{Any}/; 253chr(0x10FFFE) =~ /\p{Any}/; 254chr(0x10FFFF) =~ /\p{Any}/; 255chr(0x110000) =~ /[\p{Any}]/; 256chr(0x110001) =~ /[\w\p{Any}]/; 257chr(0x10FFFF) =~ /\p{All}/; 258chr(0x110002) =~ /[\w\p{All}]/; 259chr(0x110003) =~ /[\p{XPosixWord}]/; 260chr(0x110004) =~ /[\P{XPosixWord}]/; 261chr(0x110005) =~ /^[\p{Unassigned}]/; 262chr(0x110006) =~ /^[\P{Unassigned}]/; 263chr(0x110050) =~ /\w/; 264chr(0x110051) =~ /\W/; 265chr(0x110052) =~ /\d/; 266chr(0x110053) =~ /\D/; 267chr(0x110054) =~ /\s/; 268chr(0x110055) =~ /\S/; 269chr(0x110056) =~ /[[:word:]]/; 270chr(0x110057) =~ /[[:^word:]]/; 271chr(0x110058) =~ /[[:alnum:]]/; 272chr(0x110059) =~ /[[:^alnum:]]/; 273chr(0x11005A) =~ /[[:space:]]/; 274chr(0x11005B) =~ /[[:^space:]]/; 275chr(0x11005C) =~ /[[:digit:]]/; 276chr(0x11005D) =~ /[[:^digit:]]/; 277chr(0x11005E) =~ /[[:alpha:]]/; 278chr(0x11005F) =~ /[[:^alpha:]]/; 279chr(0x110060) =~ /[[:ascii:]]/; 280chr(0x110061) =~ /[[:^ascii:]]/; 281chr(0x110062) =~ /[[:cntrl:]]/; 282chr(0x110063) =~ /[[:^cntrl:]]/; 283chr(0x110064) =~ /[[:graph:]]/; 284chr(0x110065) =~ /[[:^graph:]]/; 285chr(0x110066) =~ /[[:lower:]]/; 286chr(0x110067) =~ /[[:^lower:]]/; 287chr(0x110068) =~ /[[:print:]]/; 288chr(0x110069) =~ /[[:^print:]]/; 289chr(0x11006A) =~ /[[:punct:]]/; 290chr(0x11006B) =~ /[[:^punct:]]/; 291chr(0x11006C) =~ /[[:upper:]]/; 292chr(0x11006D) =~ /[[:^upper:]]/; 293chr(0x11006E) =~ /[[:xdigit:]]/; 294chr(0x11006F) =~ /[[:^xdigit:]]/; 295chr(0x110070) =~ /[[:blank:]]/; 296chr(0x110071) =~ /[[:^blank:]]/; 297chr(0x111010) =~ /[\W\p{Unassigned}]/; 298chr(0x111011) =~ /[\W\P{Unassigned}]/; 299chr(0x112010) =~ /[\S\p{Unassigned}]/; 300chr(0x112011) =~ /[\S\P{Unassigned}]/; 301chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; 302chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; 303EXPECT 304Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20. 305Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21. 306######## 307# NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class) 308# Now have to make FATAL to guarantee being output 309use warnings FATAL => 'non_unicode'; 310"\x{110000}" =~ /b?\p{Space}/; 311EXPECT 312Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. 313######## 314# NAME Matching POSIX class property against above-Unicode code point doesn't output a warning 315use warnings 'non_unicode'; 316use warnings FATAL => 'non_unicode'; 317"\x{110000}" =~ /b?[[:space:]]/; 318EXPECT 319######## 320use warnings 'utf8'; 321chr(0x110000) =~ /\p{Any}/; 322######## 323# NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning 324use warnings qw(utf8 non_unicode); 325chr(0x110000) =~ /^\p{Unassigned}/; 326no warnings 'non_unicode'; 327chr(0x110001) =~ /\p{Unassigned}/; 328use warnings 'non_unicode'; 329no warnings 'utf8'; 330chr(0x110002) =~ /\p{Unassigned}/; 331EXPECT 332Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2. 333######## 334# NAME optimizable regnode should still give non_unicode warnings when fatalized 335use warnings 'utf8'; 336use warnings FATAL => 'non_unicode'; 337chr(0x110000) =~ /\p{lb=cr}/; 338EXPECT 339Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. 340######## 341# NAME optimizable regnode should not give non_unicode warnings when warnings are off 342no warnings 'non_unicode'; 343chr(0x110000) =~ /\p{lb=cr}/; 344EXPECT 345######## 346# NAME 'All' matches above-Unicode without any warning 347use warnings qw(utf8 non_unicode); 348chr(0x110000) =~ /\p{All}/; 349EXPECT 350######## 351require "../test.pl"; 352use warnings 'utf8'; 353sub Is_Super { return '!utf8::Any' } 354# The extra char is to avoid an optimization that avoids the problem when the 355# property is the only non-latin1 char in a class 356print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n"; 357EXPECT 3581 359######## 360require "../test.pl"; 361use warnings 'utf8'; 362my $file = tempfile(); 363open(my $fh, "+>:utf8", $file); 364print $fh "\x{D7FF}", "\n"; 365print $fh "\x{D800}", "\n"; 366print $fh "\x{D900}", "\n"; 367print $fh "\x{DA00}", "\n"; 368print $fh "\x{DB00}", "\n"; 369print $fh "\x{DC00}", "\n"; 370print $fh "\x{DD00}", "\n"; 371print $fh "\x{DE00}", "\n"; 372print $fh "\x{DF00}", "\n"; 373print $fh "\x{DFFF}", "\n"; 374print $fh "\x{E000}", "\n"; 375print $fh "\x{FDCF}", "\n"; 376print $fh "\x{FDD0}", "\n"; 377print $fh "\x{FDD1}", "\n"; 378print $fh "\x{FDEF}", "\n"; 379print $fh "\x{FDF0}", "\n"; 380print $fh "\x{FDFE}", "\n"; 381print $fh "\x{FDFF}", "\n"; 382print $fh "\x{FE00}", "\n"; 383print $fh "\x{FEFF}", "\n"; 384print $fh "\x{FFFD}", "\n"; 385print $fh "\x{FFFE}", "\n"; 386print $fh "\x{FFFF}", "\n"; 387print $fh "\x{10000}", "\n"; 388print $fh "\x{1FFFD}", "\n"; 389print $fh "\x{1FFFE}", "\n"; 390print $fh "\x{1FFFF}", "\n"; 391print $fh "\x{20000}", "\n"; 392print $fh "\x{2FFFD}", "\n"; 393print $fh "\x{2FFFE}", "\n"; 394print $fh "\x{2FFFF}", "\n"; 395print $fh "\x{30000}", "\n"; 396print $fh "\x{3FFFD}", "\n"; 397print $fh "\x{3FFFE}", "\n"; 398print $fh "\x{3FFFF}", "\n"; 399print $fh "\x{40000}", "\n"; 400print $fh "\x{4FFFD}", "\n"; 401print $fh "\x{4FFFE}", "\n"; 402print $fh "\x{4FFFF}", "\n"; 403print $fh "\x{50000}", "\n"; 404print $fh "\x{5FFFD}", "\n"; 405print $fh "\x{5FFFE}", "\n"; 406print $fh "\x{5FFFF}", "\n"; 407print $fh "\x{60000}", "\n"; 408print $fh "\x{6FFFD}", "\n"; 409print $fh "\x{6FFFE}", "\n"; 410print $fh "\x{6FFFF}", "\n"; 411print $fh "\x{70000}", "\n"; 412print $fh "\x{7FFFD}", "\n"; 413print $fh "\x{7FFFE}", "\n"; 414print $fh "\x{7FFFF}", "\n"; 415print $fh "\x{80000}", "\n"; 416print $fh "\x{8FFFD}", "\n"; 417print $fh "\x{8FFFE}", "\n"; 418print $fh "\x{8FFFF}", "\n"; 419print $fh "\x{90000}", "\n"; 420print $fh "\x{9FFFD}", "\n"; 421print $fh "\x{9FFFE}", "\n"; 422print $fh "\x{9FFFF}", "\n"; 423print $fh "\x{A0000}", "\n"; 424print $fh "\x{AFFFD}", "\n"; 425print $fh "\x{AFFFE}", "\n"; 426print $fh "\x{AFFFF}", "\n"; 427print $fh "\x{B0000}", "\n"; 428print $fh "\x{BFFFD}", "\n"; 429print $fh "\x{BFFFE}", "\n"; 430print $fh "\x{BFFFF}", "\n"; 431print $fh "\x{C0000}", "\n"; 432print $fh "\x{CFFFD}", "\n"; 433print $fh "\x{CFFFE}", "\n"; 434print $fh "\x{CFFFF}", "\n"; 435print $fh "\x{D0000}", "\n"; 436print $fh "\x{DFFFD}", "\n"; 437print $fh "\x{DFFFE}", "\n"; 438print $fh "\x{DFFFF}", "\n"; 439print $fh "\x{E0000}", "\n"; 440print $fh "\x{EFFFD}", "\n"; 441print $fh "\x{EFFFE}", "\n"; 442print $fh "\x{EFFFF}", "\n"; 443print $fh "\x{F0000}", "\n"; 444print $fh "\x{FFFFD}", "\n"; 445print $fh "\x{FFFFE}", "\n"; 446print $fh "\x{FFFFF}", "\n"; 447print $fh "\x{100000}", "\n"; 448print $fh "\x{10FFFD}", "\n"; 449print $fh "\x{10FFFE}", "\n"; 450print $fh "\x{10FFFF}", "\n"; 451print $fh "\x{110000}", "\n"; 452print $fh "\x{11FFFD}", "\n"; 453print $fh "\x{11FFFE}", "\n"; 454print $fh "\x{11FFFF}", "\n"; 455print $fh "\x{120000}", "\n"; 456close $fh; 457EXPECT 458Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 459Unicode surrogate U+D900 is illegal in UTF-8 at - line 7. 460Unicode surrogate U+DA00 is illegal in UTF-8 at - line 8. 461Unicode surrogate U+DB00 is illegal in UTF-8 at - line 9. 462Unicode surrogate U+DC00 is illegal in UTF-8 at - line 10. 463Unicode surrogate U+DD00 is illegal in UTF-8 at - line 11. 464Unicode surrogate U+DE00 is illegal in UTF-8 at - line 12. 465Unicode surrogate U+DF00 is illegal in UTF-8 at - line 13. 466Unicode surrogate U+DFFF is illegal in UTF-8 at - line 14. 467Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 17. 468Unicode non-character U+FDD1 is not recommended for open interchange in print at - line 18. 469Unicode non-character U+FDEF is not recommended for open interchange in print at - line 19. 470Unicode non-character U+FFFE is not recommended for open interchange in print at - line 26. 471Unicode non-character U+FFFF is not recommended for open interchange in print at - line 27. 472Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 30. 473Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 31. 474Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 34. 475Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 35. 476Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 38. 477Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 39. 478Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 42. 479Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 43. 480Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 46. 481Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 47. 482Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 50. 483Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 51. 484Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 54. 485Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 55. 486Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 58. 487Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 59. 488Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 62. 489Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 63. 490Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 66. 491Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 67. 492Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 70. 493Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 71. 494Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 74. 495Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 75. 496Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 78. 497Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 79. 498Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 82. 499Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 83. 500Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 86. 501Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 87. 502Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 90. 503Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 91. 504Code point 0x110000 is not Unicode, may not be portable in print at - line 92. 505Code point 0x11FFFD is not Unicode, may not be portable in print at - line 93. 506Code point 0x11FFFE is not Unicode, may not be portable in print at - line 94. 507Code point 0x11FFFF is not Unicode, may not be portable in print at - line 95. 508Code point 0x120000 is not Unicode, may not be portable in print at - line 96. 509######## 510require "../test.pl"; 511use warnings 'utf8'; 512my $file = tempfile(); 513open(my $fh, "+>:utf8", $file); 514print $fh "\x{D800}", "\n"; 515print $fh "\x{FFFF}", "\n"; 516print $fh "\x{110000}", "\n"; 517close $fh; 518EXPECT 519Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 520Unicode non-character U+FFFF is not recommended for open interchange in print at - line 6. 521Code point 0x110000 is not Unicode, may not be portable in print at - line 7. 522######## 523require "../test.pl"; 524use warnings 'utf8'; 525no warnings 'surrogate'; 526my $file = tempfile(); 527open(my $fh, "+>:utf8", $file); 528print $fh "\x{D800}", "\n"; 529print $fh "\x{FFFF}", "\n"; 530print $fh "\x{110000}", "\n"; 531close $fh; 532EXPECT 533Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7. 534Code point 0x110000 is not Unicode, may not be portable in print at - line 8. 535######## 536require "../test.pl"; 537use warnings 'utf8'; 538no warnings 'nonchar'; 539my $file = tempfile(); 540open(my $fh, "+>:utf8", $file); 541print $fh "\x{D800}", "\n"; 542print $fh "\x{FFFF}", "\n"; 543print $fh "\x{110000}", "\n"; 544close $fh; 545EXPECT 546Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 547Code point 0x110000 is not Unicode, may not be portable in print at - line 8. 548######## 549require "../test.pl"; 550use warnings 'utf8'; 551no warnings 'non_unicode'; 552my $file = tempfile(); 553open(my $fh, "+>:utf8", $file); 554print $fh "\x{D800}", "\n"; 555print $fh "\x{FFFF}", "\n"; 556print $fh "\x{110000}", "\n"; 557close $fh; 558EXPECT 559Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 560Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7. 561######## 562# NAME C<use warnings "nonchar"> works in isolation 563require "../test.pl"; 564use warnings 'nonchar'; 565my $file = tempfile(); 566open(my $fh, "+>:utf8", $file); 567print $fh "\x{FFFF}", "\n"; 568close $fh; 569EXPECT 570Unicode non-character U+FFFF is not recommended for open interchange in print at - line 5. 571######## 572# NAME C<use warnings "surrogate"> works in isolation 573require "../test.pl"; 574use warnings 'surrogate'; 575my $file = tempfile(); 576open(my $fh, "+>:utf8", $file); 577print $fh "\x{D800}", "\n"; 578close $fh; 579EXPECT 580Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 581######## 582# NAME C<use warnings "non_unicode"> works in isolation 583require "../test.pl"; 584use warnings 'non_unicode'; 585my $file = tempfile(); 586open(my $fh, "+>:utf8", $file); 587print $fh "\x{110000}", "\n"; 588close $fh; 589EXPECT 590Code point 0x110000 is not Unicode, may not be portable in print at - line 5. 591######## 592require "../test.pl"; 593no warnings 'utf8'; 594my $file = tempfile(); 595open(my $fh, "+>:utf8", $file); 596print $fh "\x{D7FF}", "\n"; 597print $fh "\x{D800}", "\n"; 598print $fh "\x{DFFF}", "\n"; 599print $fh "\x{E000}", "\n"; 600print $fh "\x{FDCF}", "\n"; 601print $fh "\x{FDD0}", "\n"; 602print $fh "\x{FDEF}", "\n"; 603print $fh "\x{FDF0}", "\n"; 604print $fh "\x{FEFF}", "\n"; 605print $fh "\x{FFFD}", "\n"; 606print $fh "\x{FFFE}", "\n"; 607print $fh "\x{FFFF}", "\n"; 608print $fh "\x{10000}", "\n"; 609print $fh "\x{1FFFE}", "\n"; 610print $fh "\x{1FFFF}", "\n"; 611print $fh "\x{2FFFE}", "\n"; 612print $fh "\x{2FFFF}", "\n"; 613print $fh "\x{3FFFE}", "\n"; 614print $fh "\x{3FFFF}", "\n"; 615print $fh "\x{4FFFE}", "\n"; 616print $fh "\x{4FFFF}", "\n"; 617print $fh "\x{5FFFE}", "\n"; 618print $fh "\x{5FFFF}", "\n"; 619print $fh "\x{6FFFE}", "\n"; 620print $fh "\x{6FFFF}", "\n"; 621print $fh "\x{7FFFE}", "\n"; 622print $fh "\x{7FFFF}", "\n"; 623print $fh "\x{8FFFE}", "\n"; 624print $fh "\x{8FFFF}", "\n"; 625print $fh "\x{9FFFE}", "\n"; 626print $fh "\x{9FFFF}", "\n"; 627print $fh "\x{AFFFE}", "\n"; 628print $fh "\x{AFFFF}", "\n"; 629print $fh "\x{BFFFE}", "\n"; 630print $fh "\x{BFFFF}", "\n"; 631print $fh "\x{CFFFE}", "\n"; 632print $fh "\x{CFFFF}", "\n"; 633print $fh "\x{DFFFE}", "\n"; 634print $fh "\x{DFFFF}", "\n"; 635print $fh "\x{EFFFE}", "\n"; 636print $fh "\x{EFFFF}", "\n"; 637print $fh "\x{FFFFE}", "\n"; 638print $fh "\x{FFFFF}", "\n"; 639print $fh "\x{100000}", "\n"; 640print $fh "\x{10FFFE}", "\n"; 641print $fh "\x{10FFFF}", "\n"; 642print $fh "\x{110000}", "\n"; 643close $fh; 644EXPECT 645######## 646# NAME Case change crosses 255/256 under non-UTF8 locale 647require '../loc_tools.pl'; 648unless (locales_enabled('LC_CTYPE')) { 649 print("SKIPPED\n# locales not available\n"),exit; 650} 651eval { require POSIX; POSIX->import("locale_h") }; 652if ($@) { 653 print("SKIPPED\n# no POSIX\n"),exit; 654} 655use warnings 'locale'; 656use feature 'fc'; 657use locale; 658setlocale(&POSIX::LC_CTYPE, "C"); 659my $a; 660$a = lc("\x{178}"); 661$a = fc("\x{1E9E}"); 662$a = fc("\x{FB05}"); 663$a = uc("\x{FB00}"); 664$a = ucfirst("\x{149}"); 665$a = lcfirst("\x{178}"); 666no warnings 'locale'; 667$a = lc("\x{178}"); 668$a = fc("\x{1E9E}"); 669$a = fc("\x{FB05}"); 670$a = uc("\x{FB00}"); 671$a = ucfirst("\x{149}"); 672$a = lcfirst("\x{178}"); 673EXPECT 674Can't do lc("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 14. 675Can't do fc("\x{1E9E}") on non-UTF-8 locale; resolved to "\x{17F}\x{17F}". at - line 15. 676Can't do fc("\x{FB05}") on non-UTF-8 locale; resolved to "\x{FB06}". at - line 16. 677Can't do uc("\x{FB00}") on non-UTF-8 locale; resolved to "\x{FB00}". at - line 17. 678Can't do ucfirst("\x{149}") on non-UTF-8 locale; resolved to "\x{149}". at - line 18. 679Can't do lcfirst("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 19. 680######## 681# NAME Wide character in non-UTF-8 locale 682require '../loc_tools.pl'; 683unless (locales_enabled('LC_CTYPE')) { 684 print("SKIPPED\n# locales not available\n"),exit; 685} 686eval { require POSIX; POSIX->import("locale_h") }; 687if ($@) { 688 print("SKIPPED\n# no POSIX\n"),exit; 689} 690use warnings 'locale'; 691use feature 'fc'; 692use locale; 693setlocale(&POSIX::LC_CTYPE, "C"); 694my $a; 695$a = lc("\x{100}"); 696$a = lcfirst("\x{101}"); 697$a = fc("\x{102}"); 698$a = uc("\x{103}"); 699$a = ucfirst("\x{104}"); 700no warnings 'locale'; 701$a = lc("\x{100}"); 702$a = lcfirst("\x{101}"); 703$a = fc("\x{102}"); 704$a = uc("\x{103}"); 705$a = ucfirst("\x{104}"); 706EXPECT 707Wide character (U+100) in lc at - line 14. 708Wide character (U+101) in lcfirst at - line 15. 709Wide character (U+102) in fc at - line 16. 710Wide character (U+103) in uc at - line 17. 711Wide character (U+104) in ucfirst at - line 18. 712######## 713# NAME Wide character in UTF-8 locale 714require '../loc_tools.pl'; 715unless (locales_enabled('LC_CTYPE')) { 716 print("SKIPPED\n# locales not available\n"),exit; 717} 718eval { require POSIX; POSIX->import("locale_h") }; 719if ($@) { 720 print("SKIPPED\n# no POSIX\n"),exit; 721} 722my @utf8_locales = find_utf8_ctype_locale(); 723unless (@utf8_locales) { 724 print("SKIPPED\n# no UTF-8 locales\n"),exit; 725} 726use warnings 'locale'; 727use feature 'fc'; 728use locale; 729setlocale(&POSIX::LC_CTYPE, $utf8_locales[0]); 730my $a; 731$a = lc("\x{100}"); 732$a = lcfirst("\x{101}"); 733$a = fc("\x{102}"); 734$a = uc("\x{103}"); 735$a = ucfirst("\x{104}"); 736EXPECT 737######## 738# NAME Fatality of too-large code points, but IV_MAX works, warns 739require "../test.pl"; 740use warnings 'non_unicode'; 741my $max_cp = ~0 >> 1; 742my $max_char = chr $max_cp; 743my $temp = qr/$max_char/; 744$temp = uc($max_char); 745$max_char =~ /[\x{110000}\P{Unassigned}]/; 746my $file = tempfile(); 747open(my $fh, "+>:utf8", $file); 748print $fh $max_char, "\n"; 749close $fh; 750my $error_cp = $max_cp + 1; 751my $error_char = chr $error_cp; 752EXPECT 753OPTIONS fatal regex 754Operation "uc" returns its argument for non-Unicode code point 0x7F+ at - line \d+. 755Code point 0x7F+ is not Unicode, (may not be|requires a Perl extension, and so is not) portable in print at - line \d+. 756Use of code point 0x80+ is not allowed; the permissible max is 0x7F+\ at - line \d+. 757######## 758# NAME [perl #127262] 759BEGIN{ 760 if (ord('A') == 193) { 761 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; 762 exit 0; 763 } 764 use Config; 765 unless ($Double{double_style_ieee}) { 766 print "SKIPPED\n# non-IEEE fp range."; 767 exit 0; 768 } 769{};$^H=eval'2**400'}� 770EXPECT 771Malformed UTF-8 character: \xc2\x0a (unexpected non-continuation byte 0x0a, immediately after start byte 0xc2; need 2 bytes, got 1) at - line 11. 772######## 773# NAME [perl #131646] 774BEGIN{ 775 if (ord('A') == 193) { 776 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; 777 exit 0; 778 } 779} 780no warnings; 781use warnings 'utf8'; 782for(uc 0..t){0~~pack"UXc",exp} 783EXPECT 784OPTIONS regex 785Malformed UTF-8 character: \\x([[:xdigit:]]{2})\\x([[:xdigit:]]{2}) \(unexpected non-continuation byte 0x\2, immediately after start byte 0x\1; need 2 bytes, got 1\) in pack at - line 9. 786Malformed UTF-8 character \(fatal\) at - line 9. 787