1 2 utf8.c AOK 3 4 [utf8_to_uvchr_buf] 5 Malformed UTF-8 character 6 my $a = ord "\x80" ; 7 8 Malformed UTF-8 character 9 my $a = ord "\xf080" ; 10 <<<<<< this warning can't be easily triggered from perl anymore 11 12 [utf16_to_utf8] 13 Malformed UTF-16 surrogate 14 <<<<<< Add a test when something actually calls utf16_to_utf8 15 16__END__ 17# utf8.c [utf8_to_uvchr_buf] -W 18BEGIN { 19 if (ord('A') == 193) { 20 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; 21 exit 0; 22 } 23} 24use utf8 ; 25my $a = "sn�storm" ; 26{ 27 no warnings 'utf8' ; 28 my $a = "sn�storm"; 29 use warnings 'utf8' ; 30 my $a = "sn�storm"; 31} 32EXPECT 33Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9. 34Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. 35######## 36use warnings 'utf8'; 37my $d7ff = uc(chr(0xD7FF)); 38my $d800 = uc(chr(0xD800)); 39my $dfff = uc(chr(0xDFFF)); 40my $e000 = uc(chr(0xE000)); 41my $feff = uc(chr(0xFEFF)); 42my $fffd = uc(chr(0xFFFD)); 43my $fffe = uc(chr(0xFFFE)); 44my $ffff = uc(chr(0xFFFF)); 45my $hex4 = uc(chr(0x10000)); 46my $hex5 = uc(chr(0x100000)); 47my $maxm1 = uc(chr(0x10FFFE)); 48my $max = uc(chr(0x10FFFF)); 49my $nonUnicode = uc(chr(0x110000)); 50no warnings 'utf8'; 51my $d7ff = uc(chr(0xD7FF)); 52my $d800 = uc(chr(0xD800)); 53my $dfff = uc(chr(0xDFFF)); 54my $e000 = uc(chr(0xE000)); 55my $feff = uc(chr(0xFEFF)); 56my $fffd = uc(chr(0xFFFD)); 57my $fffe = uc(chr(0xFFFE)); 58my $ffff = uc(chr(0xFFFF)); 59my $hex4 = uc(chr(0x10000)); 60my $hex5 = uc(chr(0x100000)); 61my $maxm1 = uc(chr(0x10FFFE)); 62my $max = uc(chr(0x10FFFF)); 63my $nonUnicode = uc(chr(0x110000)); 64EXPECT 65Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 66Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 67Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. 68######## 69use warnings 'utf8'; 70my $d800 = uc(chr(0xD800)); 71my $nonUnicode = uc(chr(0x110000)); 72no warnings 'surrogate'; 73my $d800 = uc(chr(0xD800)); 74my $nonUnicode = uc(chr(0x110000)); 75EXPECT 76Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 78Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6. 79######## 80use warnings 'utf8'; 81my $d800 = uc(chr(0xD800)); 82my $nonUnicode = uc(chr(0x110000)); 83no warnings 'non_unicode'; 84my $d800 = uc(chr(0xD800)); 85my $nonUnicode = uc(chr(0x110000)); 86EXPECT 87Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 88Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 89Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5. 90######## 91use warnings 'utf8'; 92no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines 93my $big_nonUnicode = uc(chr(0x8000_0000)); 94no warnings 'non_unicode'; 95my $big_nonUnicode = uc(chr(0x8000_0000)); 96EXPECT 97Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 3. 98######## 99use warnings 'utf8'; 100my $d7ff = lc pack("U", 0xD7FF); 101my $d800 = lc pack("U", 0xD800); 102my $dfff = lc pack("U", 0xDFFF); 103my $e000 = lc pack("U", 0xE000); 104my $feff = lc pack("U", 0xFEFF); 105my $fffd = lc pack("U", 0xFFFD); 106my $fffe = lc pack("U", 0xFFFE); 107my $ffff = lc pack("U", 0xFFFF); 108my $hex4 = lc pack("U", 0x10000); 109my $hex5 = lc pack("U", 0x100000); 110my $maxm1 = lc pack("U", 0x10FFFE); 111my $max = lc pack("U", 0x10FFFF); 112my $nonUnicode = lc(pack("U", 0x110000)); 113no warnings 'utf8'; 114my $d7ff = lc pack("U", 0xD7FF); 115my $d800 = lc pack("U", 0xD800); 116my $dfff = lc pack("U", 0xDFFF); 117my $e000 = lc pack("U", 0xE000); 118my $feff = lc pack("U", 0xFEFF); 119my $fffd = lc pack("U", 0xFFFD); 120my $fffe = lc pack("U", 0xFFFE); 121my $ffff = lc pack("U", 0xFFFF); 122my $hex4 = lc pack("U", 0x10000); 123my $hex5 = lc pack("U", 0x100000); 124my $maxm1 = lc pack("U", 0x10FFFE); 125my $max = lc pack("U", 0x10FFFF); 126my $nonUnicode = lc(pack("U", 0x110000)); 127EXPECT 128Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 129Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 130Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. 131######## 132use warnings 'utf8'; 133my $d7ff = ucfirst "\x{D7FF}"; 134my $d800 = ucfirst "\x{D800}"; 135my $dfff = ucfirst "\x{DFFF}"; 136my $e000 = ucfirst "\x{E000}"; 137my $feff = ucfirst "\x{FEFF}"; 138my $fffd = ucfirst "\x{FFFD}"; 139my $fffe = ucfirst "\x{FFFE}"; 140my $ffff = ucfirst "\x{FFFF}"; 141my $hex4 = ucfirst "\x{10000}"; 142my $hex5 = ucfirst "\x{100000}"; 143my $maxm1 = ucfirst "\x{10FFFE}"; 144my $max = ucfirst "\x{10FFFF}"; 145my $nonUnicode = ucfirst "\x{110000}"; 146no warnings 'utf8'; 147my $d7ff = ucfirst "\x{D7FF}"; 148my $d800 = ucfirst "\x{D800}"; 149my $dfff = ucfirst "\x{DFFF}"; 150my $e000 = ucfirst "\x{E000}"; 151my $feff = ucfirst "\x{FEFF}"; 152my $fffd = ucfirst "\x{FFFD}"; 153my $fffe = ucfirst "\x{FFFE}"; 154my $ffff = ucfirst "\x{FFFF}"; 155my $hex4 = ucfirst "\x{10000}"; 156my $hex5 = ucfirst "\x{100000}"; 157my $maxm1 = ucfirst "\x{10FFFE}"; 158my $max = ucfirst "\x{10FFFF}"; 159my $nonUnicode = ucfirst "\x{110000}"; 160EXPECT 161Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. 162Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 163Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. 164######## 165# NAME Matching \p{} against above-Unicode 166use warnings 'utf8'; 167chr(0xD7FF) =~ /\p{Any}/; 168chr(0xD800) =~ /\p{Any}/; 169chr(0xDFFF) =~ /\p{Any}/; 170chr(0xE000) =~ /\p{Any}/; 171chr(0xFEFF) =~ /\p{Any}/; 172chr(0xFFFD) =~ /\p{Any}/; 173chr(0xFFFE) =~ /\p{Any}/; 174chr(0xFFFF) =~ /\p{Any}/; 175chr(0x10000) =~ /\p{Any}/; 176chr(0x100000) =~ /\p{Any}/; 177chr(0x10FFFE) =~ /\p{Any}/; 178chr(0x10FFFF) =~ /\p{Any}/; 179chr(0x110000) =~ /[\p{Any}]/; 180chr(0x110001) =~ /[\w\p{Any}]/; 181chr(0x10FFFF) =~ /\p{All}/; 182chr(0x110002) =~ /[\w\p{All}]/; 183chr(0x110003) =~ /[\p{XPosixWord}]/; 184chr(0x110004) =~ /[\P{XPosixWord}]/; 185chr(0x110005) =~ /^[\p{Unassigned}]/; 186chr(0x110006) =~ /^[\P{Unassigned}]/; 187# Only Unicode properties give non-Unicode warnings, and only those properties 188# which do match above Unicode; and not when something else in the class 189# matches above Unicode. Below we test three ways where something outside the 190# property may match non-Unicode: a code point above it, a class \S that we 191# know at compile time doesn't, and a class \W whose values aren't (at the time 192# of this writing) specified at compile time, but which wouldn't match 193chr(0x110050) =~ /\w/; 194chr(0x110051) =~ /\W/; 195chr(0x110052) =~ /\d/; 196chr(0x110053) =~ /\D/; 197chr(0x110054) =~ /\s/; 198chr(0x110055) =~ /\S/; 199chr(0x110056) =~ /[[:word:]]/; 200chr(0x110057) =~ /[[:^word:]]/; 201chr(0x110058) =~ /[[:alnum:]]/; 202chr(0x110059) =~ /[[:^alnum:]]/; 203chr(0x11005A) =~ /[[:space:]]/; 204chr(0x11005B) =~ /[[:^space:]]/; 205chr(0x11005C) =~ /[[:digit:]]/; 206chr(0x11005D) =~ /[[:^digit:]]/; 207chr(0x11005E) =~ /[[:alpha:]]/; 208chr(0x11005F) =~ /[[:^alpha:]]/; 209chr(0x110060) =~ /[[:ascii:]]/; 210chr(0x110061) =~ /[[:^ascii:]]/; 211chr(0x110062) =~ /[[:cntrl:]]/; 212chr(0x110063) =~ /[[:^cntrl:]]/; 213chr(0x110064) =~ /[[:graph:]]/; 214chr(0x110065) =~ /[[:^graph:]]/; 215chr(0x110066) =~ /[[:lower:]]/; 216chr(0x110067) =~ /[[:^lower:]]/; 217chr(0x110068) =~ /[[:print:]]/; 218chr(0x110069) =~ /[[:^print:]]/; 219chr(0x11006A) =~ /[[:punct:]]/; 220chr(0x11006B) =~ /[[:^punct:]]/; 221chr(0x11006C) =~ /[[:upper:]]/; 222chr(0x11006D) =~ /[[:^upper:]]/; 223chr(0x11006E) =~ /[[:xdigit:]]/; 224chr(0x11006F) =~ /[[:^xdigit:]]/; 225chr(0x110070) =~ /[[:blank:]]/; 226chr(0x110071) =~ /[[:^blank:]]/; 227chr(0x111010) =~ /[\W\p{Unassigned}]/; 228chr(0x111011) =~ /[\W\P{Unassigned}]/; 229chr(0x112010) =~ /[\S\p{Unassigned}]/; 230chr(0x112011) =~ /[\S\P{Unassigned}]/; 231chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; 232chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; 233no warnings 'utf8'; 234chr(0xD7FF) =~ /\p{Any}/; 235chr(0xD800) =~ /\p{Any}/; 236chr(0xDFFF) =~ /\p{Any}/; 237chr(0xE000) =~ /\p{Any}/; 238chr(0xFEFF) =~ /\p{Any}/; 239chr(0xFFFD) =~ /\p{Any}/; 240chr(0xFFFE) =~ /\p{Any}/; 241chr(0xFFFF) =~ /\p{Any}/; 242chr(0x10000) =~ /\p{Any}/; 243chr(0x100000) =~ /\p{Any}/; 244chr(0x10FFFE) =~ /\p{Any}/; 245chr(0x10FFFF) =~ /\p{Any}/; 246chr(0x110000) =~ /[\p{Any}]/; 247chr(0x110001) =~ /[\w\p{Any}]/; 248chr(0x10FFFF) =~ /\p{All}/; 249chr(0x110002) =~ /[\w\p{All}]/; 250chr(0x110003) =~ /[\p{XPosixWord}]/; 251chr(0x110004) =~ /[\P{XPosixWord}]/; 252chr(0x110005) =~ /^[\p{Unassigned}]/; 253chr(0x110006) =~ /^[\P{Unassigned}]/; 254chr(0x110050) =~ /\w/; 255chr(0x110051) =~ /\W/; 256chr(0x110052) =~ /\d/; 257chr(0x110053) =~ /\D/; 258chr(0x110054) =~ /\s/; 259chr(0x110055) =~ /\S/; 260chr(0x110056) =~ /[[:word:]]/; 261chr(0x110057) =~ /[[:^word:]]/; 262chr(0x110058) =~ /[[:alnum:]]/; 263chr(0x110059) =~ /[[:^alnum:]]/; 264chr(0x11005A) =~ /[[:space:]]/; 265chr(0x11005B) =~ /[[:^space:]]/; 266chr(0x11005C) =~ /[[:digit:]]/; 267chr(0x11005D) =~ /[[:^digit:]]/; 268chr(0x11005E) =~ /[[:alpha:]]/; 269chr(0x11005F) =~ /[[:^alpha:]]/; 270chr(0x110060) =~ /[[:ascii:]]/; 271chr(0x110061) =~ /[[:^ascii:]]/; 272chr(0x110062) =~ /[[:cntrl:]]/; 273chr(0x110063) =~ /[[:^cntrl:]]/; 274chr(0x110064) =~ /[[:graph:]]/; 275chr(0x110065) =~ /[[:^graph:]]/; 276chr(0x110066) =~ /[[:lower:]]/; 277chr(0x110067) =~ /[[:^lower:]]/; 278chr(0x110068) =~ /[[:print:]]/; 279chr(0x110069) =~ /[[:^print:]]/; 280chr(0x11006A) =~ /[[:punct:]]/; 281chr(0x11006B) =~ /[[:^punct:]]/; 282chr(0x11006C) =~ /[[:upper:]]/; 283chr(0x11006D) =~ /[[:^upper:]]/; 284chr(0x11006E) =~ /[[:xdigit:]]/; 285chr(0x11006F) =~ /[[:^xdigit:]]/; 286chr(0x110070) =~ /[[:blank:]]/; 287chr(0x110071) =~ /[[:^blank:]]/; 288chr(0x111010) =~ /[\W\p{Unassigned}]/; 289chr(0x111011) =~ /[\W\P{Unassigned}]/; 290chr(0x112010) =~ /[\S\p{Unassigned}]/; 291chr(0x112011) =~ /[\S\P{Unassigned}]/; 292chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; 293chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; 294EXPECT 295Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20. 296Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21. 297######## 298# NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class) 299# Now have to make FATAL to guarantee being output 300use warnings FATAL => 'non_unicode'; 301"\x{110000}" =~ /b?\p{Space}/; 302EXPECT 303Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. 304######## 305# NAME Matching POSIX class property against above-Unicode code point doesn't output a warning 306use warnings 'non_unicode'; 307use warnings FATAL => 'non_unicode'; 308"\x{110000}" =~ /b?[[:space:]]/; 309EXPECT 310######## 311use warnings 'utf8'; 312chr(0x110000) =~ /\p{Any}/; 313######## 314# NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning 315use warnings qw(utf8 non_unicode); 316chr(0x110000) =~ /^\p{Unassigned}/; 317no warnings 'non_unicode'; 318chr(0x110001) =~ /\p{Unassigned}/; 319use warnings 'non_unicode'; 320no warnings 'utf8'; 321chr(0x110002) =~ /\p{Unassigned}/; 322EXPECT 323Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2. 324######## 325# NAME optimizable regnode should still give non_unicode warnings when fatalized 326use warnings 'utf8'; 327use warnings FATAL => 'non_unicode'; 328chr(0x110000) =~ /\p{lb=cr}/; 329EXPECT 330Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. 331######## 332# NAME optimizable regnode should not give non_unicode warnings when warnings are off 333no warnings 'non_unicode'; 334chr(0x110000) =~ /\p{lb=cr}/; 335EXPECT 336######## 337# NAME 'All' matches above-Unicode without any warning 338use warnings qw(utf8 non_unicode); 339chr(0x110000) =~ /\p{All}/; 340EXPECT 341######## 342require "../test.pl"; 343use warnings 'utf8'; 344sub Is_Super { return '!utf8::Any' } 345# The extra char is to avoid an optimization that avoids the problem when the 346# property is the only non-latin1 char in a class 347print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n"; 348EXPECT 3491 350######## 351require "../test.pl"; 352use warnings 'utf8'; 353my $file = tempfile(); 354open(my $fh, "+>:utf8", $file); 355print $fh "\x{D7FF}", "\n"; 356print $fh "\x{D800}", "\n"; 357print $fh "\x{D900}", "\n"; 358print $fh "\x{DA00}", "\n"; 359print $fh "\x{DB00}", "\n"; 360print $fh "\x{DC00}", "\n"; 361print $fh "\x{DD00}", "\n"; 362print $fh "\x{DE00}", "\n"; 363print $fh "\x{DF00}", "\n"; 364print $fh "\x{DFFF}", "\n"; 365print $fh "\x{E000}", "\n"; 366print $fh "\x{FDCF}", "\n"; 367print $fh "\x{FDD0}", "\n"; 368print $fh "\x{FDD1}", "\n"; 369print $fh "\x{FDEF}", "\n"; 370print $fh "\x{FDF0}", "\n"; 371print $fh "\x{FDFE}", "\n"; 372print $fh "\x{FDFF}", "\n"; 373print $fh "\x{FE00}", "\n"; 374print $fh "\x{FEFF}", "\n"; 375print $fh "\x{FFFD}", "\n"; 376print $fh "\x{FFFE}", "\n"; 377print $fh "\x{FFFF}", "\n"; 378print $fh "\x{10000}", "\n"; 379print $fh "\x{1FFFD}", "\n"; 380print $fh "\x{1FFFE}", "\n"; 381print $fh "\x{1FFFF}", "\n"; 382print $fh "\x{20000}", "\n"; 383print $fh "\x{2FFFD}", "\n"; 384print $fh "\x{2FFFE}", "\n"; 385print $fh "\x{2FFFF}", "\n"; 386print $fh "\x{30000}", "\n"; 387print $fh "\x{3FFFD}", "\n"; 388print $fh "\x{3FFFE}", "\n"; 389print $fh "\x{3FFFF}", "\n"; 390print $fh "\x{40000}", "\n"; 391print $fh "\x{4FFFD}", "\n"; 392print $fh "\x{4FFFE}", "\n"; 393print $fh "\x{4FFFF}", "\n"; 394print $fh "\x{50000}", "\n"; 395print $fh "\x{5FFFD}", "\n"; 396print $fh "\x{5FFFE}", "\n"; 397print $fh "\x{5FFFF}", "\n"; 398print $fh "\x{60000}", "\n"; 399print $fh "\x{6FFFD}", "\n"; 400print $fh "\x{6FFFE}", "\n"; 401print $fh "\x{6FFFF}", "\n"; 402print $fh "\x{70000}", "\n"; 403print $fh "\x{7FFFD}", "\n"; 404print $fh "\x{7FFFE}", "\n"; 405print $fh "\x{7FFFF}", "\n"; 406print $fh "\x{80000}", "\n"; 407print $fh "\x{8FFFD}", "\n"; 408print $fh "\x{8FFFE}", "\n"; 409print $fh "\x{8FFFF}", "\n"; 410print $fh "\x{90000}", "\n"; 411print $fh "\x{9FFFD}", "\n"; 412print $fh "\x{9FFFE}", "\n"; 413print $fh "\x{9FFFF}", "\n"; 414print $fh "\x{A0000}", "\n"; 415print $fh "\x{AFFFD}", "\n"; 416print $fh "\x{AFFFE}", "\n"; 417print $fh "\x{AFFFF}", "\n"; 418print $fh "\x{B0000}", "\n"; 419print $fh "\x{BFFFD}", "\n"; 420print $fh "\x{BFFFE}", "\n"; 421print $fh "\x{BFFFF}", "\n"; 422print $fh "\x{C0000}", "\n"; 423print $fh "\x{CFFFD}", "\n"; 424print $fh "\x{CFFFE}", "\n"; 425print $fh "\x{CFFFF}", "\n"; 426print $fh "\x{D0000}", "\n"; 427print $fh "\x{DFFFD}", "\n"; 428print $fh "\x{DFFFE}", "\n"; 429print $fh "\x{DFFFF}", "\n"; 430print $fh "\x{E0000}", "\n"; 431print $fh "\x{EFFFD}", "\n"; 432print $fh "\x{EFFFE}", "\n"; 433print $fh "\x{EFFFF}", "\n"; 434print $fh "\x{F0000}", "\n"; 435print $fh "\x{FFFFD}", "\n"; 436print $fh "\x{FFFFE}", "\n"; 437print $fh "\x{FFFFF}", "\n"; 438print $fh "\x{100000}", "\n"; 439print $fh "\x{10FFFD}", "\n"; 440print $fh "\x{10FFFE}", "\n"; 441print $fh "\x{10FFFF}", "\n"; 442print $fh "\x{110000}", "\n"; 443print $fh "\x{11FFFD}", "\n"; 444print $fh "\x{11FFFE}", "\n"; 445print $fh "\x{11FFFF}", "\n"; 446print $fh "\x{120000}", "\n"; 447close $fh; 448EXPECT 449Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 450Unicode surrogate U+D900 is illegal in UTF-8 at - line 7. 451Unicode surrogate U+DA00 is illegal in UTF-8 at - line 8. 452Unicode surrogate U+DB00 is illegal in UTF-8 at - line 9. 453Unicode surrogate U+DC00 is illegal in UTF-8 at - line 10. 454Unicode surrogate U+DD00 is illegal in UTF-8 at - line 11. 455Unicode surrogate U+DE00 is illegal in UTF-8 at - line 12. 456Unicode surrogate U+DF00 is illegal in UTF-8 at - line 13. 457Unicode surrogate U+DFFF is illegal in UTF-8 at - line 14. 458Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 17. 459Unicode non-character U+FDD1 is not recommended for open interchange in print at - line 18. 460Unicode non-character U+FDEF is not recommended for open interchange in print at - line 19. 461Unicode non-character U+FFFE is not recommended for open interchange in print at - line 26. 462Unicode non-character U+FFFF is not recommended for open interchange in print at - line 27. 463Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 30. 464Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 31. 465Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 34. 466Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 35. 467Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 38. 468Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 39. 469Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 42. 470Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 43. 471Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 46. 472Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 47. 473Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 50. 474Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 51. 475Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 54. 476Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 55. 477Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 58. 478Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 59. 479Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 62. 480Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 63. 481Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 66. 482Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 67. 483Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 70. 484Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 71. 485Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 74. 486Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 75. 487Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 78. 488Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 79. 489Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 82. 490Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 83. 491Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 86. 492Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 87. 493Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 90. 494Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 91. 495Code point 0x110000 is not Unicode, may not be portable in print at - line 92. 496Code point 0x11FFFD is not Unicode, may not be portable in print at - line 93. 497Code point 0x11FFFE is not Unicode, may not be portable in print at - line 94. 498Code point 0x11FFFF is not Unicode, may not be portable in print at - line 95. 499Code point 0x120000 is not Unicode, may not be portable in print at - line 96. 500######## 501require "../test.pl"; 502use warnings 'utf8'; 503my $file = tempfile(); 504open(my $fh, "+>:utf8", $file); 505print $fh "\x{D800}", "\n"; 506print $fh "\x{FFFF}", "\n"; 507print $fh "\x{110000}", "\n"; 508close $fh; 509EXPECT 510Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 511Unicode non-character U+FFFF is not recommended for open interchange in print at - line 6. 512Code point 0x110000 is not Unicode, may not be portable in print at - line 7. 513######## 514require "../test.pl"; 515use warnings 'utf8'; 516no warnings 'surrogate'; 517my $file = tempfile(); 518open(my $fh, "+>:utf8", $file); 519print $fh "\x{D800}", "\n"; 520print $fh "\x{FFFF}", "\n"; 521print $fh "\x{110000}", "\n"; 522close $fh; 523EXPECT 524Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7. 525Code point 0x110000 is not Unicode, may not be portable in print at - line 8. 526######## 527require "../test.pl"; 528use warnings 'utf8'; 529no warnings 'nonchar'; 530my $file = tempfile(); 531open(my $fh, "+>:utf8", $file); 532print $fh "\x{D800}", "\n"; 533print $fh "\x{FFFF}", "\n"; 534print $fh "\x{110000}", "\n"; 535close $fh; 536EXPECT 537Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 538Code point 0x110000 is not Unicode, may not be portable in print at - line 8. 539######## 540require "../test.pl"; 541use warnings 'utf8'; 542no warnings 'non_unicode'; 543my $file = tempfile(); 544open(my $fh, "+>:utf8", $file); 545print $fh "\x{D800}", "\n"; 546print $fh "\x{FFFF}", "\n"; 547print $fh "\x{110000}", "\n"; 548close $fh; 549EXPECT 550Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 551Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7. 552######## 553# NAME C<use warnings "nonchar"> works in isolation 554require "../test.pl"; 555use warnings 'nonchar'; 556my $file = tempfile(); 557open(my $fh, "+>:utf8", $file); 558print $fh "\x{FFFF}", "\n"; 559close $fh; 560EXPECT 561Unicode non-character U+FFFF is not recommended for open interchange in print at - line 5. 562######## 563# NAME C<use warnings "surrogate"> works in isolation 564require "../test.pl"; 565use warnings 'surrogate'; 566my $file = tempfile(); 567open(my $fh, "+>:utf8", $file); 568print $fh "\x{D800}", "\n"; 569close $fh; 570EXPECT 571Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 572######## 573# NAME C<use warnings "non_unicode"> works in isolation 574require "../test.pl"; 575use warnings 'non_unicode'; 576my $file = tempfile(); 577open(my $fh, "+>:utf8", $file); 578print $fh "\x{110000}", "\n"; 579close $fh; 580EXPECT 581Code point 0x110000 is not Unicode, may not be portable in print at - line 5. 582######## 583require "../test.pl"; 584no warnings 'utf8'; 585my $file = tempfile(); 586open(my $fh, "+>:utf8", $file); 587print $fh "\x{D7FF}", "\n"; 588print $fh "\x{D800}", "\n"; 589print $fh "\x{DFFF}", "\n"; 590print $fh "\x{E000}", "\n"; 591print $fh "\x{FDCF}", "\n"; 592print $fh "\x{FDD0}", "\n"; 593print $fh "\x{FDEF}", "\n"; 594print $fh "\x{FDF0}", "\n"; 595print $fh "\x{FEFF}", "\n"; 596print $fh "\x{FFFD}", "\n"; 597print $fh "\x{FFFE}", "\n"; 598print $fh "\x{FFFF}", "\n"; 599print $fh "\x{10000}", "\n"; 600print $fh "\x{1FFFE}", "\n"; 601print $fh "\x{1FFFF}", "\n"; 602print $fh "\x{2FFFE}", "\n"; 603print $fh "\x{2FFFF}", "\n"; 604print $fh "\x{3FFFE}", "\n"; 605print $fh "\x{3FFFF}", "\n"; 606print $fh "\x{4FFFE}", "\n"; 607print $fh "\x{4FFFF}", "\n"; 608print $fh "\x{5FFFE}", "\n"; 609print $fh "\x{5FFFF}", "\n"; 610print $fh "\x{6FFFE}", "\n"; 611print $fh "\x{6FFFF}", "\n"; 612print $fh "\x{7FFFE}", "\n"; 613print $fh "\x{7FFFF}", "\n"; 614print $fh "\x{8FFFE}", "\n"; 615print $fh "\x{8FFFF}", "\n"; 616print $fh "\x{9FFFE}", "\n"; 617print $fh "\x{9FFFF}", "\n"; 618print $fh "\x{AFFFE}", "\n"; 619print $fh "\x{AFFFF}", "\n"; 620print $fh "\x{BFFFE}", "\n"; 621print $fh "\x{BFFFF}", "\n"; 622print $fh "\x{CFFFE}", "\n"; 623print $fh "\x{CFFFF}", "\n"; 624print $fh "\x{DFFFE}", "\n"; 625print $fh "\x{DFFFF}", "\n"; 626print $fh "\x{EFFFE}", "\n"; 627print $fh "\x{EFFFF}", "\n"; 628print $fh "\x{FFFFE}", "\n"; 629print $fh "\x{FFFFF}", "\n"; 630print $fh "\x{100000}", "\n"; 631print $fh "\x{10FFFE}", "\n"; 632print $fh "\x{10FFFF}", "\n"; 633print $fh "\x{110000}", "\n"; 634close $fh; 635EXPECT 636######## 637# NAME Case change crosses 255/256 under non-UTF8 locale 638require '../loc_tools.pl'; 639unless (locales_enabled('LC_CTYPE')) { 640 print("SKIPPED\n# locales not available\n"),exit; 641} 642eval { require POSIX; POSIX->import("locale_h") }; 643if ($@) { 644 print("SKIPPED\n# no POSIX\n"),exit; 645} 646use warnings 'locale'; 647use feature 'fc'; 648use locale; 649setlocale(&POSIX::LC_CTYPE, "C"); 650my $a; 651$a = lc("\x{178}"); 652$a = fc("\x{1E9E}"); 653$a = fc("\x{FB05}"); 654$a = uc("\x{FB00}"); 655$a = ucfirst("\x{149}"); 656$a = lcfirst("\x{178}"); 657no warnings 'locale'; 658$a = lc("\x{178}"); 659$a = fc("\x{1E9E}"); 660$a = fc("\x{FB05}"); 661$a = uc("\x{FB00}"); 662$a = ucfirst("\x{149}"); 663$a = lcfirst("\x{178}"); 664EXPECT 665Can't do lc("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 14. 666Can't do fc("\x{1E9E}") on non-UTF-8 locale; resolved to "\x{17F}\x{17F}". at - line 15. 667Can't do fc("\x{FB05}") on non-UTF-8 locale; resolved to "\x{FB06}". at - line 16. 668Can't do uc("\x{FB00}") on non-UTF-8 locale; resolved to "\x{FB00}". at - line 17. 669Can't do ucfirst("\x{149}") on non-UTF-8 locale; resolved to "\x{149}". at - line 18. 670Can't do lcfirst("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 19. 671######## 672# NAME Wide character in non-UTF-8 locale 673require '../loc_tools.pl'; 674unless (locales_enabled('LC_CTYPE')) { 675 print("SKIPPED\n# locales not available\n"),exit; 676} 677eval { require POSIX; POSIX->import("locale_h") }; 678if ($@) { 679 print("SKIPPED\n# no POSIX\n"),exit; 680} 681use warnings 'locale'; 682use feature 'fc'; 683use locale; 684setlocale(&POSIX::LC_CTYPE, "C"); 685my $a; 686$a = lc("\x{100}"); 687$a = lcfirst("\x{101}"); 688$a = fc("\x{102}"); 689$a = uc("\x{103}"); 690$a = ucfirst("\x{104}"); 691no warnings 'locale'; 692$a = lc("\x{100}"); 693$a = lcfirst("\x{101}"); 694$a = fc("\x{102}"); 695$a = uc("\x{103}"); 696$a = ucfirst("\x{104}"); 697EXPECT 698Wide character (U+100) in lc at - line 14. 699Wide character (U+101) in lcfirst at - line 15. 700Wide character (U+102) in fc at - line 16. 701Wide character (U+103) in uc at - line 17. 702Wide character (U+104) in ucfirst at - line 18. 703######## 704# NAME Wide character in UTF-8 locale 705require '../loc_tools.pl'; 706unless (locales_enabled('LC_CTYPE')) { 707 print("SKIPPED\n# locales not available\n"),exit; 708} 709eval { require POSIX; POSIX->import("locale_h") }; 710if ($@) { 711 print("SKIPPED\n# no POSIX\n"),exit; 712} 713my @utf8_locales = find_utf8_ctype_locale(); 714unless (@utf8_locales) { 715 print("SKIPPED\n# no UTF-8 locales\n"),exit; 716} 717use warnings 'locale'; 718use feature 'fc'; 719use locale; 720setlocale(&POSIX::LC_CTYPE, $utf8_locales[0]); 721my $a; 722$a = lc("\x{100}"); 723$a = lcfirst("\x{101}"); 724$a = fc("\x{102}"); 725$a = uc("\x{103}"); 726$a = ucfirst("\x{104}"); 727EXPECT 728######## 729# NAME Deprecation of too-large code points 730require "../test.pl"; 731use warnings 'non_unicode'; 732my $max_cp = ~0 >> 1; 733my $max_char = chr $max_cp; 734my $to_warn_cp = $max_cp + 1; 735my $to_warn_char = chr $to_warn_cp; 736$max_char =~ /[\x{110000}\P{Unassigned}]/; 737$to_warn_char =~ /[\x{110000}\P{Unassigned}]/; 738my $temp = qr/$max_char/; 739$temp = qr/$to_warn_char/; 740$temp = uc($max_char); 741$temp = uc($to_warn_char); 742my $file = tempfile(); 743open(my $fh, "+>:utf8", $file); 744print $fh $max_char, "\n"; 745print $fh $to_warn_char, "\n"; 746close $fh; 747EXPECT 748OPTION regex 749Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+. 750Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in pattern match \(m//\) at - line \d+. 751Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+. 752Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+. 753Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+. 754Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+. 755Operation "uc" returns its argument for non-Unicode code point 0x7F+ at - line \d+. 756Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+. 757Operation "uc" returns its argument for non-Unicode code point 0x80+ at - line \d+. 758Code point 0x7F+ is not Unicode, may not be portable in print at - line \d+. 759Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in print at - line \d+. 760######## 761# NAME [perl #127262] 762BEGIN{ 763 if (ord('A') == 193) { 764 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; 765 exit 0; 766 } 767{};$^H=2**400}� 768EXPECT 769Malformed UTF-8 character (unexpected non-continuation byte 0x0a, immediately after start byte 0xc2) at - line 6. 770