1 2 utf8.c AOK 3 4 [utf8_to_uvchr_buf] 5 Malformed UTF-8 character 6 my $a = ord "\x80" ; 7 8 Malformed UTF-8 character 9 my $a = ord "\xf080" ; 10 <<<<<< this warning can't be easily triggered from perl anymore 11 12 [utf16_to_utf8] 13 Malformed UTF-16 surrogate 14 <<<<<< Add a test when something actually calls utf16_to_utf8 15 16__END__ 17# utf8.c [utf8_to_uvchr_buf] -W 18BEGIN { 19 if (ord('A') == 193) { 20 print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings."; 21 exit 0; 22 } 23} 24use utf8 ; 25my $a = "sn�storm" ; 26{ 27 no warnings 'utf8' ; 28 my $a = "sn�storm"; 29 use warnings 'utf8' ; 30 my $a = "sn�storm"; 31} 32EXPECT 33Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9. 34Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. 35######## 36use warnings 'utf8'; 37my $d7ff = uc(chr(0xD7FF)); 38my $d800 = uc(chr(0xD800)); 39my $dfff = uc(chr(0xDFFF)); 40my $e000 = uc(chr(0xE000)); 41my $feff = uc(chr(0xFEFF)); 42my $fffd = uc(chr(0xFFFD)); 43my $fffe = uc(chr(0xFFFE)); 44my $ffff = uc(chr(0xFFFF)); 45my $hex4 = uc(chr(0x10000)); 46my $hex5 = uc(chr(0x100000)); 47my $maxm1 = uc(chr(0x10FFFE)); 48my $max = uc(chr(0x10FFFF)); 49my $nonUnicode = uc(chr(0x110000)); 50no warnings 'utf8'; 51my $d7ff = uc(chr(0xD7FF)); 52my $d800 = uc(chr(0xD800)); 53my $dfff = uc(chr(0xDFFF)); 54my $e000 = uc(chr(0xE000)); 55my $feff = uc(chr(0xFEFF)); 56my $fffd = uc(chr(0xFFFD)); 57my $fffe = uc(chr(0xFFFE)); 58my $ffff = uc(chr(0xFFFF)); 59my $hex4 = uc(chr(0x10000)); 60my $hex5 = uc(chr(0x100000)); 61my $maxm1 = uc(chr(0x10FFFE)); 62my $max = uc(chr(0x10FFFF)); 63my $nonUnicode = uc(chr(0x110000)); 64EXPECT 65Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 66Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 67Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. 68######## 69use warnings 'utf8'; 70my $d800 = uc(chr(0xD800)); 71my $nonUnicode = uc(chr(0x110000)); 72no warnings 'surrogate'; 73my $d800 = uc(chr(0xD800)); 74my $nonUnicode = uc(chr(0x110000)); 75EXPECT 76Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 78Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6. 79######## 80use warnings 'utf8'; 81my $d800 = uc(chr(0xD800)); 82my $nonUnicode = uc(chr(0x110000)); 83my $big_nonUnicode = uc(chr(0x8000_0000)); 84no warnings 'non_unicode'; 85my $d800 = uc(chr(0xD800)); 86my $nonUnicode = uc(chr(0x110000)); 87my $big_nonUnicode = uc(chr(0x8000_0000)); 88EXPECT 89Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 90Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 91Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4. 92Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6. 93######## 94use warnings 'utf8'; 95my $d7ff = lc pack("U", 0xD7FF); 96my $d800 = lc pack("U", 0xD800); 97my $dfff = lc pack("U", 0xDFFF); 98my $e000 = lc pack("U", 0xE000); 99my $feff = lc pack("U", 0xFEFF); 100my $fffd = lc pack("U", 0xFFFD); 101my $fffe = lc pack("U", 0xFFFE); 102my $ffff = lc pack("U", 0xFFFF); 103my $hex4 = lc pack("U", 0x10000); 104my $hex5 = lc pack("U", 0x100000); 105my $maxm1 = lc pack("U", 0x10FFFE); 106my $max = lc pack("U", 0x10FFFF); 107my $nonUnicode = lc(pack("U", 0x110000)); 108no warnings 'utf8'; 109my $d7ff = lc pack("U", 0xD7FF); 110my $d800 = lc pack("U", 0xD800); 111my $dfff = lc pack("U", 0xDFFF); 112my $e000 = lc pack("U", 0xE000); 113my $feff = lc pack("U", 0xFEFF); 114my $fffd = lc pack("U", 0xFFFD); 115my $fffe = lc pack("U", 0xFFFE); 116my $ffff = lc pack("U", 0xFFFF); 117my $hex4 = lc pack("U", 0x10000); 118my $hex5 = lc pack("U", 0x100000); 119my $maxm1 = lc pack("U", 0x10FFFE); 120my $max = lc pack("U", 0x10FFFF); 121my $nonUnicode = lc(pack("U", 0x110000)); 122EXPECT 123Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 124Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 125Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. 126######## 127use warnings 'utf8'; 128my $d7ff = ucfirst "\x{D7FF}"; 129my $d800 = ucfirst "\x{D800}"; 130my $dfff = ucfirst "\x{DFFF}"; 131my $e000 = ucfirst "\x{E000}"; 132my $feff = ucfirst "\x{FEFF}"; 133my $fffd = ucfirst "\x{FFFD}"; 134my $fffe = ucfirst "\x{FFFE}"; 135my $ffff = ucfirst "\x{FFFF}"; 136my $hex4 = ucfirst "\x{10000}"; 137my $hex5 = ucfirst "\x{100000}"; 138my $maxm1 = ucfirst "\x{10FFFE}"; 139my $max = ucfirst "\x{10FFFF}"; 140my $nonUnicode = ucfirst "\x{110000}"; 141no warnings 'utf8'; 142my $d7ff = ucfirst "\x{D7FF}"; 143my $d800 = ucfirst "\x{D800}"; 144my $dfff = ucfirst "\x{DFFF}"; 145my $e000 = ucfirst "\x{E000}"; 146my $feff = ucfirst "\x{FEFF}"; 147my $fffd = ucfirst "\x{FFFD}"; 148my $fffe = ucfirst "\x{FFFE}"; 149my $ffff = ucfirst "\x{FFFF}"; 150my $hex4 = ucfirst "\x{10000}"; 151my $hex5 = ucfirst "\x{100000}"; 152my $maxm1 = ucfirst "\x{10FFFE}"; 153my $max = ucfirst "\x{10FFFF}"; 154my $nonUnicode = ucfirst "\x{110000}"; 155EXPECT 156Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. 157Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 158Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. 159######## 160use warnings 'utf8'; 161chr(0xD7FF) =~ /\p{Any}/; 162chr(0xD800) =~ /\p{Any}/; 163chr(0xDFFF) =~ /\p{Any}/; 164chr(0xE000) =~ /\p{Any}/; 165chr(0xFEFF) =~ /\p{Any}/; 166chr(0xFFFD) =~ /\p{Any}/; 167chr(0xFFFE) =~ /\p{Any}/; 168chr(0xFFFF) =~ /\p{Any}/; 169chr(0x10000) =~ /\p{Any}/; 170chr(0x100000) =~ /\p{Any}/; 171chr(0x10FFFE) =~ /\p{Any}/; 172chr(0x10FFFF) =~ /\p{Any}/; 173chr(0x110000) =~ /[\w\p{Any}]/; 174chr(0x110010) =~ /[\w\p{PosixWord}]/; 175chr(0x110011) =~ /[\w\P{PosixWord}]/; 176chr(0x110012) =~ /[\w\p{XPosixWord}]/; 177chr(0x110013) =~ /[\w\P{XPosixWord}]/; 178chr(0x110014) =~ /[\w\p{PosixAlnum}]/; 179chr(0x110015) =~ /[\w\P{PosixAlnum}]/; 180chr(0x110016) =~ /[\w\p{XPosixAlnum}]/; 181chr(0x110017) =~ /[\w\P{XPosixAlnum}]/; 182chr(0x110018) =~ /[\w\p{PosixSpace}]/; 183chr(0x110019) =~ /[\w\P{PosixSpace}]/; 184chr(0x11001A) =~ /[\w\p{XPosixSpace}]/; 185chr(0x11001B) =~ /[\w\P{XPosixSpace}]/; 186chr(0x11001C) =~ /[\w\p{PosixDigit}]/; 187chr(0x11001D) =~ /[\w\P{PosixDigit}]/; 188chr(0x11001E) =~ /[\w\p{XPosixDigit}]/; 189chr(0x11001F) =~ /[\w\P{XPosixDigit}]/; 190chr(0x110020) =~ /[\w\p{PosixAlpha}]/; 191chr(0x110021) =~ /[\w\P{PosixAlpha}]/; 192chr(0x110022) =~ /[\w\p{XPosixAlpha}]/; 193chr(0x110023) =~ /[\w\P{XPosixAlpha}]/; 194chr(0x110024) =~ /[\w\p{Ascii}]/; 195chr(0x110025) =~ /[\w\P{Ascii}]/; 196chr(0x110026) =~ /[\w\p{PosixCntrl}]/; 197chr(0x110027) =~ /[\w\P{PosixCntrl}]/; 198chr(0x110028) =~ /[\w\p{XPosixCntrl}]/; 199chr(0x110029) =~ /[\w\P{XPosixCntrl}]/; 200chr(0x11002A) =~ /[\w\p{PosixGraph}]/; 201chr(0x11002B) =~ /[\w\P{PosixGraph}]/; 202chr(0x11002C) =~ /[\w\p{XPosixGraph}]/; 203chr(0x11002D) =~ /[\w\P{XPosixGraph}]/; 204chr(0x11002E) =~ /[\w\p{PosixLower}]/; 205chr(0x11002F) =~ /[\w\P{PosixLower}]/; 206chr(0x110030) =~ /[\w\p{XPosixLower}]/; 207chr(0x110031) =~ /[\w\P{XPosixLower}]/; 208chr(0x110032) =~ /[\w\p{PosixPrint}]/; 209chr(0x110033) =~ /[\w\P{PosixPrint}]/; 210chr(0x110034) =~ /[\w\p{XPosixPrint}]/; 211chr(0x110035) =~ /[\w\P{XPosixPrint}]/; 212chr(0x110036) =~ /[\w\p{PosixPunct}]/; 213chr(0x110037) =~ /[\w\P{PosixPunct}]/; 214chr(0x110038) =~ /[\w\p{XPosixPunct}]/; 215chr(0x110039) =~ /[\w\P{XPosixPunct}]/; 216chr(0x11003A) =~ /[\w\p{PosixUpper}]/; 217chr(0x11003B) =~ /[\w\P{PosixUpper}]/; 218chr(0x11003C) =~ /[\w\p{XPosixUpper}]/; 219chr(0x11003D) =~ /[\w\P{XPosixUpper}]/; 220chr(0x11003E) =~ /[\w\p{PosixXdigit}]/; 221chr(0x11003F) =~ /[\w\P{PosixXdigit}]/; 222chr(0x110040) =~ /[\w\p{XPosixXdigit}]/; 223chr(0x110041) =~ /[\w\P{XPosixXdigit}]/; 224chr(0x110042) =~ /[\w\p{PerlSpace}]/; 225chr(0x110043) =~ /[\w\P{PerlSpace}]/; 226chr(0x110044) =~ /[\w\p{XPerlSpace}]/; 227chr(0x110045) =~ /[\w\P{XPerlSpace}]/; 228chr(0x110046) =~ /[\w\p{PosixBlank}]/; 229chr(0x110047) =~ /[\w\P{PosixBlank}]/; 230chr(0x110048) =~ /[\w\p{XPosixBlank}]/; 231chr(0x110049) =~ /[\w\P{XPosixBlank}]/; 232# Currently some warnings from the above are output twice 233# Only Unicode properties give non-Unicode warnings, and not when something 234# else in the class matches above Unicode. Below we test three ways where 235# something outside the property may match non-Unicode: a code point above it, 236# a class \S that we know at compile time doesn't, and a class \W whose values 237# aren't (at the time of this writing) specified at compile time, but which 238# wouldn't match 239chr(0x110050) =~ /\w/; 240chr(0x110051) =~ /\W/; 241chr(0x110052) =~ /\d/; 242chr(0x110053) =~ /\D/; 243chr(0x110054) =~ /\s/; 244chr(0x110055) =~ /\S/; 245chr(0x110056) =~ /[[:word:]]/; 246chr(0x110057) =~ /[[:^word:]]/; 247chr(0x110058) =~ /[[:alnum:]]/; 248chr(0x110059) =~ /[[:^alnum:]]/; 249chr(0x11005A) =~ /[[:space:]]/; 250chr(0x11005B) =~ /[[:^space:]]/; 251chr(0x11005C) =~ /[[:digit:]]/; 252chr(0x11005D) =~ /[[:^digit:]]/; 253chr(0x11005E) =~ /[[:alpha:]]/; 254chr(0x11005F) =~ /[[:^alpha:]]/; 255chr(0x110060) =~ /[[:ascii:]]/; 256chr(0x110061) =~ /[[:^ascii:]]/; 257chr(0x110062) =~ /[[:cntrl:]]/; 258chr(0x110063) =~ /[[:^cntrl:]]/; 259chr(0x110064) =~ /[[:graph:]]/; 260chr(0x110065) =~ /[[:^graph:]]/; 261chr(0x110066) =~ /[[:lower:]]/; 262chr(0x110067) =~ /[[:^lower:]]/; 263chr(0x110068) =~ /[[:print:]]/; 264chr(0x110069) =~ /[[:^print:]]/; 265chr(0x11006A) =~ /[[:punct:]]/; 266chr(0x11006B) =~ /[[:^punct:]]/; 267chr(0x11006C) =~ /[[:upper:]]/; 268chr(0x11006D) =~ /[[:^upper:]]/; 269chr(0x11006E) =~ /[[:xdigit:]]/; 270chr(0x11006F) =~ /[[:^xdigit:]]/; 271chr(0x110070) =~ /[[:blank:]]/; 272chr(0x110071) =~ /[[:^blank:]]/; 273chr(0x111000) =~ /[\W\p{Any}]/; 274chr(0x111010) =~ /[\W\p{PosixWord}]/; 275chr(0x111011) =~ /[\W\P{PosixWord}]/; 276chr(0x111012) =~ /[\W\p{XPosixWord}]/; 277chr(0x111013) =~ /[\W\P{XPosixWord}]/; 278chr(0x111014) =~ /[\W\p{PosixAlnum}]/; 279chr(0x111015) =~ /[\W\P{PosixAlnum}]/; 280chr(0x111016) =~ /[\W\p{XPosixAlnum}]/; 281chr(0x111017) =~ /[\W\P{XPosixAlnum}]/; 282chr(0x111018) =~ /[\W\p{PosixSpace}]/; 283chr(0x111019) =~ /[\W\P{PosixSpace}]/; 284chr(0x11101A) =~ /[\W\p{XPosixSpace}]/; 285chr(0x11101B) =~ /[\W\P{XPosixSpace}]/; 286chr(0x11101C) =~ /[\W\p{PosixDigit}]/; 287chr(0x11101D) =~ /[\W\P{PosixDigit}]/; 288chr(0x11101E) =~ /[\W\p{XPosixDigit}]/; 289chr(0x11101F) =~ /[\W\P{XPosixDigit}]/; 290chr(0x111020) =~ /[\W\p{PosixAlpha}]/; 291chr(0x111021) =~ /[\W\P{PosixAlpha}]/; 292chr(0x111022) =~ /[\W\p{XPosixAlpha}]/; 293chr(0x111023) =~ /[\W\P{XPosixAlpha}]/; 294chr(0x111024) =~ /[\W\p{Ascii}]/; 295chr(0x111025) =~ /[\W\P{Ascii}]/; 296chr(0x111026) =~ /[\W\p{PosixCntrl}]/; 297chr(0x111027) =~ /[\W\P{PosixCntrl}]/; 298chr(0x111028) =~ /[\W\p{XPosixCntrl}]/; 299chr(0x111029) =~ /[\W\P{XPosixCntrl}]/; 300chr(0x11102A) =~ /[\W\p{PosixGraph}]/; 301chr(0x11102B) =~ /[\W\P{PosixGraph}]/; 302chr(0x11102C) =~ /[\W\p{XPosixGraph}]/; 303chr(0x11102D) =~ /[\W\P{XPosixGraph}]/; 304chr(0x11102E) =~ /[\W\p{PosixLower}]/; 305chr(0x11102F) =~ /[\W\P{PosixLower}]/; 306chr(0x111030) =~ /[\W\p{XPosixLower}]/; 307chr(0x111031) =~ /[\W\P{XPosixLower}]/; 308chr(0x111032) =~ /[\W\p{PosixPrint}]/; 309chr(0x111033) =~ /[\W\P{PosixPrint}]/; 310chr(0x111034) =~ /[\W\p{XPosixPrint}]/; 311chr(0x111035) =~ /[\W\P{XPosixPrint}]/; 312chr(0x111036) =~ /[\W\p{PosixPunct}]/; 313chr(0x111037) =~ /[\W\P{PosixPunct}]/; 314chr(0x111038) =~ /[\W\p{XPosixPunct}]/; 315chr(0x111039) =~ /[\W\P{XPosixPunct}]/; 316chr(0x11103A) =~ /[\W\p{PosixUpper}]/; 317chr(0x11103B) =~ /[\W\P{PosixUpper}]/; 318chr(0x11103C) =~ /[\W\p{XPosixUpper}]/; 319chr(0x11103D) =~ /[\W\P{XPosixUpper}]/; 320chr(0x11103E) =~ /[\W\p{PosixXdigit}]/; 321chr(0x11103F) =~ /[\W\P{PosixXdigit}]/; 322chr(0x111040) =~ /[\W\p{XPosixXdigit}]/; 323chr(0x111041) =~ /[\W\P{XPosixXdigit}]/; 324chr(0x111042) =~ /[\W\p{PerlSpace}]/; 325chr(0x111043) =~ /[\W\P{PerlSpace}]/; 326chr(0x111044) =~ /[\W\p{XPerlSpace}]/; 327chr(0x111045) =~ /[\W\P{XPerlSpace}]/; 328chr(0x111046) =~ /[\W\p{PosixBlank}]/; 329chr(0x111047) =~ /[\W\P{PosixBlank}]/; 330chr(0x111048) =~ /[\W\p{XPosixBlank}]/; 331chr(0x111049) =~ /[\W\P{XPosixBlank}]/; 332chr(0x112000) =~ /[\S\p{Any}]/; 333chr(0x112010) =~ /[\S\p{PosixWord}]/; 334chr(0x112011) =~ /[\S\P{PosixWord}]/; 335chr(0x112012) =~ /[\S\p{XPosixWord}]/; 336chr(0x112013) =~ /[\S\P{XPosixWord}]/; 337chr(0x112014) =~ /[\S\p{PosixAlnum}]/; 338chr(0x112015) =~ /[\S\P{PosixAlnum}]/; 339chr(0x112016) =~ /[\S\p{XPosixAlnum}]/; 340chr(0x112017) =~ /[\S\P{XPosixAlnum}]/; 341chr(0x112018) =~ /[\S\p{PosixSpace}]/; 342chr(0x112019) =~ /[\S\P{PosixSpace}]/; 343chr(0x11201A) =~ /[\S\p{XPosixSpace}]/; 344chr(0x11201B) =~ /[\S\P{XPosixSpace}]/; 345chr(0x11201C) =~ /[\S\p{PosixDigit}]/; 346chr(0x11201D) =~ /[\S\P{PosixDigit}]/; 347chr(0x11201E) =~ /[\S\p{XPosixDigit}]/; 348chr(0x11201F) =~ /[\S\P{XPosixDigit}]/; 349chr(0x112020) =~ /[\S\p{PosixAlpha}]/; 350chr(0x112021) =~ /[\S\P{PosixAlpha}]/; 351chr(0x112022) =~ /[\S\p{XPosixAlpha}]/; 352chr(0x112023) =~ /[\S\P{XPosixAlpha}]/; 353chr(0x112024) =~ /[\S\p{Ascii}]/; 354chr(0x112025) =~ /[\S\P{Ascii}]/; 355chr(0x112026) =~ /[\S\p{PosixCntrl}]/; 356chr(0x112027) =~ /[\S\P{PosixCntrl}]/; 357chr(0x112028) =~ /[\S\p{XPosixCntrl}]/; 358chr(0x112029) =~ /[\S\P{XPosixCntrl}]/; 359chr(0x11202A) =~ /[\S\p{PosixGraph}]/; 360chr(0x11202B) =~ /[\S\P{PosixGraph}]/; 361chr(0x11202C) =~ /[\S\p{XPosixGraph}]/; 362chr(0x11202D) =~ /[\S\P{XPosixGraph}]/; 363chr(0x11202E) =~ /[\S\p{PosixLower}]/; 364chr(0x11202F) =~ /[\S\P{PosixLower}]/; 365chr(0x112030) =~ /[\S\p{XPosixLower}]/; 366chr(0x112031) =~ /[\S\P{XPosixLower}]/; 367chr(0x112032) =~ /[\S\p{PosixPrint}]/; 368chr(0x112033) =~ /[\S\P{PosixPrint}]/; 369chr(0x112034) =~ /[\S\p{XPosixPrint}]/; 370chr(0x112035) =~ /[\S\P{XPosixPrint}]/; 371chr(0x112036) =~ /[\S\p{PosixPunct}]/; 372chr(0x112037) =~ /[\S\P{PosixPunct}]/; 373chr(0x112038) =~ /[\S\p{XPosixPunct}]/; 374chr(0x112039) =~ /[\S\P{XPosixPunct}]/; 375chr(0x11203A) =~ /[\S\p{PosixUpper}]/; 376chr(0x11203B) =~ /[\S\P{PosixUpper}]/; 377chr(0x11203C) =~ /[\S\p{XPosixUpper}]/; 378chr(0x11203D) =~ /[\S\P{XPosixUpper}]/; 379chr(0x11203E) =~ /[\S\p{PosixXdigit}]/; 380chr(0x11203F) =~ /[\S\P{PosixXdigit}]/; 381chr(0x112040) =~ /[\S\p{XPosixXdigit}]/; 382chr(0x112041) =~ /[\S\P{XPosixXdigit}]/; 383chr(0x112042) =~ /[\S\p{PerlSpace}]/; 384chr(0x112043) =~ /[\S\P{PerlSpace}]/; 385chr(0x112044) =~ /[\S\p{XPerlSpace}]/; 386chr(0x112045) =~ /[\S\P{XPerlSpace}]/; 387chr(0x112046) =~ /[\S\p{PosixBlank}]/; 388chr(0x112047) =~ /[\S\P{PosixBlank}]/; 389chr(0x112048) =~ /[\S\p{XPosixBlank}]/; 390chr(0x112049) =~ /[\S\P{XPosixBlank}]/; 391chr(0x113000) =~ /[\x{110000}\p{Any}]/; 392chr(0x113010) =~ /[\x{110000}\p{PosixWord}]/; 393chr(0x113011) =~ /[\x{110000}\P{PosixWord}]/; 394chr(0x113012) =~ /[\x{110000}\p{XPosixWord}]/; 395chr(0x113013) =~ /[\x{110000}\P{XPosixWord}]/; 396chr(0x113014) =~ /[\x{110000}\p{PosixAlnum}]/; 397chr(0x113015) =~ /[\x{110000}\P{PosixAlnum}]/; 398chr(0x113016) =~ /[\x{110000}\p{XPosixAlnum}]/; 399chr(0x113017) =~ /[\x{110000}\P{XPosixAlnum}]/; 400chr(0x113018) =~ /[\x{110000}\p{PosixSpace}]/; 401chr(0x113019) =~ /[\x{110000}\P{PosixSpace}]/; 402chr(0x11301A) =~ /[\x{110000}\p{XPosixSpace}]/; 403chr(0x11301B) =~ /[\x{110000}\P{XPosixSpace}]/; 404chr(0x11301C) =~ /[\x{110000}\p{PosixDigit}]/; 405chr(0x11301D) =~ /[\x{110000}\P{PosixDigit}]/; 406chr(0x11301E) =~ /[\x{110000}\p{XPosixDigit}]/; 407chr(0x11301F) =~ /[\x{110000}\P{XPosixDigit}]/; 408chr(0x113020) =~ /[\x{110000}\p{PosixAlpha}]/; 409chr(0x113021) =~ /[\x{110000}\P{PosixAlpha}]/; 410chr(0x113022) =~ /[\x{110000}\p{XPosixAlpha}]/; 411chr(0x113023) =~ /[\x{110000}\P{XPosixAlpha}]/; 412chr(0x113024) =~ /[\x{110000}\p{Ascii}]/; 413chr(0x113025) =~ /[\x{110000}\P{Ascii}]/; 414chr(0x113026) =~ /[\x{110000}\p{PosixCntrl}]/; 415chr(0x113027) =~ /[\x{110000}\P{PosixCntrl}]/; 416chr(0x113028) =~ /[\x{110000}\p{XPosixCntrl}]/; 417chr(0x113029) =~ /[\x{110000}\P{XPosixCntrl}]/; 418chr(0x11302A) =~ /[\x{110000}\p{PosixGraph}]/; 419chr(0x11302B) =~ /[\x{110000}\P{PosixGraph}]/; 420chr(0x11302C) =~ /[\x{110000}\p{XPosixGraph}]/; 421chr(0x11302D) =~ /[\x{110000}\P{XPosixGraph}]/; 422chr(0x11302E) =~ /[\x{110000}\p{PosixLower}]/; 423chr(0x11302F) =~ /[\x{110000}\P{PosixLower}]/; 424chr(0x113030) =~ /[\x{110000}\p{XPosixLower}]/; 425chr(0x113031) =~ /[\x{110000}\P{XPosixLower}]/; 426chr(0x113032) =~ /[\x{110000}\p{PosixPrint}]/; 427chr(0x113033) =~ /[\x{110000}\P{PosixPrint}]/; 428chr(0x113034) =~ /[\x{110000}\p{XPosixPrint}]/; 429chr(0x113035) =~ /[\x{110000}\P{XPosixPrint}]/; 430chr(0x113036) =~ /[\x{110000}\p{PosixPunct}]/; 431chr(0x113037) =~ /[\x{110000}\P{PosixPunct}]/; 432chr(0x113038) =~ /[\x{110000}\p{XPosixPunct}]/; 433chr(0x113039) =~ /[\x{110000}\P{XPosixPunct}]/; 434chr(0x11303A) =~ /[\x{110000}\p{PosixUpper}]/; 435chr(0x11303B) =~ /[\x{110000}\P{PosixUpper}]/; 436chr(0x11303C) =~ /[\x{110000}\p{XPosixUpper}]/; 437chr(0x11303D) =~ /[\x{110000}\P{XPosixUpper}]/; 438chr(0x11303E) =~ /[\x{110000}\p{PosixXdigit}]/; 439chr(0x11303F) =~ /[\x{110000}\P{PosixXdigit}]/; 440chr(0x113040) =~ /[\x{110000}\p{XPosixXdigit}]/; 441chr(0x113041) =~ /[\x{110000}\P{XPosixXdigit}]/; 442chr(0x113042) =~ /[\x{110000}\p{PerlSpace}]/; 443chr(0x113043) =~ /[\x{110000}\P{PerlSpace}]/; 444chr(0x113044) =~ /[\x{110000}\p{XPerlSpace}]/; 445chr(0x113045) =~ /[\x{110000}\P{XPerlSpace}]/; 446chr(0x113046) =~ /[\x{110000}\p{PosixBlank}]/; 447chr(0x113047) =~ /[\x{110000}\P{PosixBlank}]/; 448chr(0x113048) =~ /[\x{110000}\p{XPosixBlank}]/; 449chr(0x113049) =~ /[\x{110000}\P{XPosixBlank}]/; 450no warnings 'utf8'; 451chr(0xD7FF) =~ /\p{Any}/; 452chr(0xD800) =~ /\p{Any}/; 453chr(0xDFFF) =~ /\p{Any}/; 454chr(0xE000) =~ /\p{Any}/; 455chr(0xFEFF) =~ /\p{Any}/; 456chr(0xFFFD) =~ /\p{Any}/; 457chr(0xFFFE) =~ /\p{Any}/; 458chr(0xFFFF) =~ /\p{Any}/; 459chr(0x10000) =~ /\p{Any}/; 460chr(0x100000) =~ /\p{Any}/; 461chr(0x10FFFE) =~ /\p{Any}/; 462chr(0x10FFFF) =~ /\p{Any}/; 463chr(0x110000) =~ /\p{Any}/; 464chr(0x110010) =~ /\p{PosixWord}/; 465chr(0x110011) =~ /\P{PosixWord}/; 466chr(0x110012) =~ /\p{XPosixWord}/; 467chr(0x110013) =~ /\P{XPosixWord}/; 468chr(0x110014) =~ /\p{PosixAlnum}/; 469chr(0x110015) =~ /\P{PosixAlnum}/; 470chr(0x110016) =~ /\p{XPosixAlnum}/; 471chr(0x110017) =~ /\P{XPosixAlnum}/; 472chr(0x110018) =~ /\p{PosixSpace}/; 473chr(0x110019) =~ /\P{PosixSpace}/; 474chr(0x11001A) =~ /\p{XPosixSpace}/; 475chr(0x11001B) =~ /\P{XPosixSpace}/; 476chr(0x11001C) =~ /\p{PosixDigit}/; 477chr(0x11001D) =~ /\P{PosixDigit}/; 478chr(0x11001E) =~ /\p{XPosixDigit}/; 479chr(0x11001F) =~ /\P{XPosixDigit}/; 480chr(0x110020) =~ /\p{PosixAlpha}/; 481chr(0x110021) =~ /\P{PosixAlpha}/; 482chr(0x110022) =~ /\p{XPosixAlpha}/; 483chr(0x110023) =~ /\P{XPosixAlpha}/; 484chr(0x110024) =~ /\p{Ascii}/; 485chr(0x110025) =~ /\P{Ascii}/; 486chr(0x110026) =~ /\p{PosixCntrl}/; 487chr(0x110027) =~ /\P{PosixCntrl}/; 488chr(0x110028) =~ /\p{XPosixCntrl}/; 489chr(0x110029) =~ /\P{XPosixCntrl}/; 490chr(0x11002A) =~ /\p{PosixGraph}/; 491chr(0x11002B) =~ /\P{PosixGraph}/; 492chr(0x11002C) =~ /\p{XPosixGraph}/; 493chr(0x11002D) =~ /\P{XPosixGraph}/; 494chr(0x11002E) =~ /\p{PosixLower}/; 495chr(0x11002F) =~ /\P{PosixLower}/; 496chr(0x110030) =~ /\p{XPosixLower}/; 497chr(0x110031) =~ /\P{XPosixLower}/; 498chr(0x110032) =~ /\p{PosixPrint}/; 499chr(0x110033) =~ /\P{PosixPrint}/; 500chr(0x110034) =~ /\p{XPosixPrint}/; 501chr(0x110035) =~ /\P{XPosixPrint}/; 502chr(0x110036) =~ /\p{PosixPunct}/; 503chr(0x110037) =~ /\P{PosixPunct}/; 504chr(0x110038) =~ /\p{XPosixPunct}/; 505chr(0x110039) =~ /\P{XPosixPunct}/; 506chr(0x11003A) =~ /\p{PosixUpper}/; 507chr(0x11003B) =~ /\P{PosixUpper}/; 508chr(0x11003C) =~ /\p{XPosixUpper}/; 509chr(0x11003D) =~ /\P{XPosixUpper}/; 510chr(0x11003E) =~ /\p{PosixXdigit}/; 511chr(0x11003F) =~ /\P{PosixXdigit}/; 512chr(0x110040) =~ /\p{XPosixXdigit}/; 513chr(0x110041) =~ /\P{XPosixXdigit}/; 514chr(0x110042) =~ /\p{PerlSpace}/; 515chr(0x110043) =~ /\P{PerlSpace}/; 516chr(0x110044) =~ /\p{XPerlSpace}/; 517chr(0x110045) =~ /\P{XPerlSpace}/; 518chr(0x110046) =~ /\p{PosixBlank}/; 519chr(0x110047) =~ /\P{PosixBlank}/; 520chr(0x110048) =~ /\p{XPosixBlank}/; 521chr(0x110049) =~ /\P{XPosixBlank}/; 522chr(0x110050) =~ /\w/; 523chr(0x110051) =~ /\W/; 524chr(0x110052) =~ /\d/; 525chr(0x110053) =~ /\D/; 526chr(0x110054) =~ /\s/; 527chr(0x110055) =~ /\S/; 528chr(0x110056) =~ /[[:word:]]/; 529chr(0x110057) =~ /[[:^word:]]/; 530chr(0x110058) =~ /[[:alnum:]]/; 531chr(0x110059) =~ /[[:^alnum:]]/; 532chr(0x11005A) =~ /[[:space:]]/; 533chr(0x11005B) =~ /[[:^space:]]/; 534chr(0x11005C) =~ /[[:digit:]]/; 535chr(0x11005D) =~ /[[:^digit:]]/; 536chr(0x11005E) =~ /[[:alpha:]]/; 537chr(0x11005F) =~ /[[:^alpha:]]/; 538chr(0x110060) =~ /[[:ascii:]]/; 539chr(0x110061) =~ /[[:^ascii:]]/; 540chr(0x110062) =~ /[[:cntrl:]]/; 541chr(0x110063) =~ /[[:^cntrl:]]/; 542chr(0x110064) =~ /[[:graph:]]/; 543chr(0x110065) =~ /[[:^graph:]]/; 544chr(0x110066) =~ /[[:lower:]]/; 545chr(0x110067) =~ /[[:^lower:]]/; 546chr(0x110068) =~ /[[:print:]]/; 547chr(0x110069) =~ /[[:^print:]]/; 548chr(0x11006A) =~ /[[:punct:]]/; 549chr(0x11006B) =~ /[[:^punct:]]/; 550chr(0x11006C) =~ /[[:upper:]]/; 551chr(0x11006D) =~ /[[:^upper:]]/; 552chr(0x11006E) =~ /[[:xdigit:]]/; 553chr(0x11006F) =~ /[[:^xdigit:]]/; 554chr(0x110070) =~ /[[:blank:]]/; 555chr(0x110071) =~ /[[:^blank:]]/; 556EXPECT 557Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 14. 558Code point 0x110010 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 15. 559Code point 0x110011 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 16. 560Code point 0x110011 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 16. 561Code point 0x110012 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 17. 562Code point 0x110013 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 18. 563Code point 0x110013 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 18. 564Code point 0x110014 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 19. 565Code point 0x110015 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 20. 566Code point 0x110015 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 20. 567Code point 0x110016 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 21. 568Code point 0x110017 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 22. 569Code point 0x110017 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 22. 570Code point 0x110018 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 23. 571Code point 0x110019 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 24. 572Code point 0x110019 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 24. 573Code point 0x11001A is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 25. 574Code point 0x11001B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 26. 575Code point 0x11001B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 26. 576Code point 0x11001C is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 27. 577Code point 0x11001D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 28. 578Code point 0x11001D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 28. 579Code point 0x11001E is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 29. 580Code point 0x11001F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 30. 581Code point 0x11001F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 30. 582Code point 0x110020 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 31. 583Code point 0x110021 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 32. 584Code point 0x110021 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 32. 585Code point 0x110022 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 33. 586Code point 0x110023 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 34. 587Code point 0x110023 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 34. 588Code point 0x110024 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 35. 589Code point 0x110025 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 36. 590Code point 0x110025 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 36. 591Code point 0x110026 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 37. 592Code point 0x110027 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 38. 593Code point 0x110027 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 38. 594Code point 0x110028 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 39. 595Code point 0x110029 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 40. 596Code point 0x110029 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 40. 597Code point 0x11002A is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 41. 598Code point 0x11002B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 42. 599Code point 0x11002B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 42. 600Code point 0x11002C is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 43. 601Code point 0x11002D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 44. 602Code point 0x11002D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 44. 603Code point 0x11002E is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 45. 604Code point 0x11002F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 46. 605Code point 0x11002F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 46. 606Code point 0x110030 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 47. 607Code point 0x110031 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 48. 608Code point 0x110031 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 48. 609Code point 0x110032 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 49. 610Code point 0x110033 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 50. 611Code point 0x110033 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 50. 612Code point 0x110034 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 51. 613Code point 0x110035 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 52. 614Code point 0x110035 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 52. 615Code point 0x110036 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 53. 616Code point 0x110037 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 54. 617Code point 0x110037 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 54. 618Code point 0x110038 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 55. 619Code point 0x110039 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 56. 620Code point 0x110039 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 56. 621Code point 0x11003A is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 57. 622Code point 0x11003B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 58. 623Code point 0x11003B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 58. 624Code point 0x11003C is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 59. 625Code point 0x11003D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 60. 626Code point 0x11003D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 60. 627Code point 0x11003E is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 61. 628Code point 0x11003F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 62. 629Code point 0x11003F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 62. 630Code point 0x110040 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 63. 631Code point 0x110041 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 64. 632Code point 0x110041 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 64. 633Code point 0x110042 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 65. 634Code point 0x110043 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 66. 635Code point 0x110043 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 66. 636Code point 0x110044 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 67. 637Code point 0x110045 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 68. 638Code point 0x110045 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 68. 639Code point 0x110046 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 69. 640Code point 0x110047 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 70. 641Code point 0x110047 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 70. 642Code point 0x110048 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 71. 643Code point 0x110049 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 72. 644Code point 0x110049 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 72. 645######## 646use warnings 'utf8'; 647chr(0x110000) =~ /\p{Any}/; 648no warnings 'non_unicode'; 649chr(0x110000) =~ /\p{Any}/; 650EXPECT 651Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2. 652######## 653# TODO optimized regnode should still give warnings 654use warnings 'utf8'; 655chr(0x110000) =~ /lb=cr/; 656no warnings 'non_unicode'; 657chr(0x110000) =~ /lb=cr/; 658EXPECT 659Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2. 660######## 661require "../test.pl"; 662use warnings 'utf8'; 663sub Is_Super { return '!utf8::Any' } 664# The extra char is to avoid an optimization that avoids the problem when the 665# property is the only non-latin1 char in a class 666print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n"; 667EXPECT 6681 669######## 670require "../test.pl"; 671use warnings 'utf8'; 672my $file = tempfile(); 673open(my $fh, "+>:utf8", $file); 674print $fh "\x{D7FF}", "\n"; 675print $fh "\x{D800}", "\n"; 676print $fh "\x{DFFF}", "\n"; 677print $fh "\x{E000}", "\n"; 678print $fh "\x{FDCF}", "\n"; 679print $fh "\x{FDD0}", "\n"; 680print $fh "\x{FDEF}", "\n"; 681print $fh "\x{FDF0}", "\n"; 682print $fh "\x{FEFF}", "\n"; 683print $fh "\x{FFFD}", "\n"; 684print $fh "\x{FFFE}", "\n"; 685print $fh "\x{FFFF}", "\n"; 686print $fh "\x{10000}", "\n"; 687print $fh "\x{1FFFE}", "\n"; 688print $fh "\x{1FFFF}", "\n"; 689print $fh "\x{2FFFE}", "\n"; 690print $fh "\x{2FFFF}", "\n"; 691print $fh "\x{3FFFE}", "\n"; 692print $fh "\x{3FFFF}", "\n"; 693print $fh "\x{4FFFE}", "\n"; 694print $fh "\x{4FFFF}", "\n"; 695print $fh "\x{5FFFE}", "\n"; 696print $fh "\x{5FFFF}", "\n"; 697print $fh "\x{6FFFE}", "\n"; 698print $fh "\x{6FFFF}", "\n"; 699print $fh "\x{7FFFE}", "\n"; 700print $fh "\x{7FFFF}", "\n"; 701print $fh "\x{8FFFE}", "\n"; 702print $fh "\x{8FFFF}", "\n"; 703print $fh "\x{9FFFE}", "\n"; 704print $fh "\x{9FFFF}", "\n"; 705print $fh "\x{AFFFE}", "\n"; 706print $fh "\x{AFFFF}", "\n"; 707print $fh "\x{BFFFE}", "\n"; 708print $fh "\x{BFFFF}", "\n"; 709print $fh "\x{CFFFE}", "\n"; 710print $fh "\x{CFFFF}", "\n"; 711print $fh "\x{DFFFE}", "\n"; 712print $fh "\x{DFFFF}", "\n"; 713print $fh "\x{EFFFE}", "\n"; 714print $fh "\x{EFFFF}", "\n"; 715print $fh "\x{FFFFE}", "\n"; 716print $fh "\x{FFFFF}", "\n"; 717print $fh "\x{100000}", "\n"; 718print $fh "\x{10FFFE}", "\n"; 719print $fh "\x{10FFFF}", "\n"; 720print $fh "\x{110000}", "\n"; 721close $fh; 722EXPECT 723Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 724Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7. 725Unicode non-character U+FDD0 is illegal for open interchange at - line 10. 726Unicode non-character U+FDEF is illegal for open interchange at - line 11. 727Unicode non-character U+FFFE is illegal for open interchange at - line 15. 728Unicode non-character U+FFFF is illegal for open interchange at - line 16. 729Unicode non-character U+1FFFE is illegal for open interchange at - line 18. 730Unicode non-character U+1FFFF is illegal for open interchange at - line 19. 731Unicode non-character U+2FFFE is illegal for open interchange at - line 20. 732Unicode non-character U+2FFFF is illegal for open interchange at - line 21. 733Unicode non-character U+3FFFE is illegal for open interchange at - line 22. 734Unicode non-character U+3FFFF is illegal for open interchange at - line 23. 735Unicode non-character U+4FFFE is illegal for open interchange at - line 24. 736Unicode non-character U+4FFFF is illegal for open interchange at - line 25. 737Unicode non-character U+5FFFE is illegal for open interchange at - line 26. 738Unicode non-character U+5FFFF is illegal for open interchange at - line 27. 739Unicode non-character U+6FFFE is illegal for open interchange at - line 28. 740Unicode non-character U+6FFFF is illegal for open interchange at - line 29. 741Unicode non-character U+7FFFE is illegal for open interchange at - line 30. 742Unicode non-character U+7FFFF is illegal for open interchange at - line 31. 743Unicode non-character U+8FFFE is illegal for open interchange at - line 32. 744Unicode non-character U+8FFFF is illegal for open interchange at - line 33. 745Unicode non-character U+9FFFE is illegal for open interchange at - line 34. 746Unicode non-character U+9FFFF is illegal for open interchange at - line 35. 747Unicode non-character U+AFFFE is illegal for open interchange at - line 36. 748Unicode non-character U+AFFFF is illegal for open interchange at - line 37. 749Unicode non-character U+BFFFE is illegal for open interchange at - line 38. 750Unicode non-character U+BFFFF is illegal for open interchange at - line 39. 751Unicode non-character U+CFFFE is illegal for open interchange at - line 40. 752Unicode non-character U+CFFFF is illegal for open interchange at - line 41. 753Unicode non-character U+DFFFE is illegal for open interchange at - line 42. 754Unicode non-character U+DFFFF is illegal for open interchange at - line 43. 755Unicode non-character U+EFFFE is illegal for open interchange at - line 44. 756Unicode non-character U+EFFFF is illegal for open interchange at - line 45. 757Unicode non-character U+FFFFE is illegal for open interchange at - line 46. 758Unicode non-character U+FFFFF is illegal for open interchange at - line 47. 759Unicode non-character U+10FFFE is illegal for open interchange at - line 49. 760Unicode non-character U+10FFFF is illegal for open interchange at - line 50. 761Code point 0x110000 is not Unicode, may not be portable at - line 51. 762######## 763require "../test.pl"; 764use warnings 'utf8'; 765my $file = tempfile(); 766open(my $fh, "+>:utf8", $file); 767print $fh "\x{D800}", "\n"; 768print $fh "\x{FFFF}", "\n"; 769print $fh "\x{110000}", "\n"; 770close $fh; 771EXPECT 772Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 773Unicode non-character U+FFFF is illegal for open interchange at - line 6. 774Code point 0x110000 is not Unicode, may not be portable at - line 7. 775######## 776require "../test.pl"; 777use warnings 'utf8'; 778no warnings 'surrogate'; 779my $file = tempfile(); 780open(my $fh, "+>:utf8", $file); 781print $fh "\x{D800}", "\n"; 782print $fh "\x{FFFF}", "\n"; 783print $fh "\x{110000}", "\n"; 784close $fh; 785EXPECT 786Unicode non-character U+FFFF is illegal for open interchange at - line 7. 787Code point 0x110000 is not Unicode, may not be portable at - line 8. 788######## 789require "../test.pl"; 790use warnings 'utf8'; 791no warnings 'nonchar'; 792my $file = tempfile(); 793open(my $fh, "+>:utf8", $file); 794print $fh "\x{D800}", "\n"; 795print $fh "\x{FFFF}", "\n"; 796print $fh "\x{110000}", "\n"; 797close $fh; 798EXPECT 799Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 800Code point 0x110000 is not Unicode, may not be portable at - line 8. 801######## 802require "../test.pl"; 803use warnings 'utf8'; 804no warnings 'non_unicode'; 805my $file = tempfile(); 806open(my $fh, "+>:utf8", $file); 807print $fh "\x{D800}", "\n"; 808print $fh "\x{FFFF}", "\n"; 809print $fh "\x{110000}", "\n"; 810close $fh; 811EXPECT 812Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 813Unicode non-character U+FFFF is illegal for open interchange at - line 7. 814######## 815# NAME C<use warnings "nonchar"> works in isolation 816require "../test.pl"; 817use warnings 'nonchar'; 818my $file = tempfile(); 819open(my $fh, "+>:utf8", $file); 820print $fh "\x{FFFF}", "\n"; 821close $fh; 822EXPECT 823Unicode non-character U+FFFF is illegal for open interchange at - line 5. 824######## 825# NAME C<use warnings "surrogate"> works in isolation 826require "../test.pl"; 827use warnings 'surrogate'; 828my $file = tempfile(); 829open(my $fh, "+>:utf8", $file); 830print $fh "\x{D800}", "\n"; 831close $fh; 832EXPECT 833Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 834######## 835# NAME C<use warnings "non_unicode"> works in isolation 836require "../test.pl"; 837use warnings 'non_unicode'; 838my $file = tempfile(); 839open(my $fh, "+>:utf8", $file); 840print $fh "\x{110000}", "\n"; 841close $fh; 842EXPECT 843Code point 0x110000 is not Unicode, may not be portable at - line 5. 844######## 845require "../test.pl"; 846no warnings 'utf8'; 847my $file = tempfile(); 848open(my $fh, "+>:utf8", $file); 849print $fh "\x{D7FF}", "\n"; 850print $fh "\x{D800}", "\n"; 851print $fh "\x{DFFF}", "\n"; 852print $fh "\x{E000}", "\n"; 853print $fh "\x{FDCF}", "\n"; 854print $fh "\x{FDD0}", "\n"; 855print $fh "\x{FDEF}", "\n"; 856print $fh "\x{FDF0}", "\n"; 857print $fh "\x{FEFF}", "\n"; 858print $fh "\x{FFFD}", "\n"; 859print $fh "\x{FFFE}", "\n"; 860print $fh "\x{FFFF}", "\n"; 861print $fh "\x{10000}", "\n"; 862print $fh "\x{1FFFE}", "\n"; 863print $fh "\x{1FFFF}", "\n"; 864print $fh "\x{2FFFE}", "\n"; 865print $fh "\x{2FFFF}", "\n"; 866print $fh "\x{3FFFE}", "\n"; 867print $fh "\x{3FFFF}", "\n"; 868print $fh "\x{4FFFE}", "\n"; 869print $fh "\x{4FFFF}", "\n"; 870print $fh "\x{5FFFE}", "\n"; 871print $fh "\x{5FFFF}", "\n"; 872print $fh "\x{6FFFE}", "\n"; 873print $fh "\x{6FFFF}", "\n"; 874print $fh "\x{7FFFE}", "\n"; 875print $fh "\x{7FFFF}", "\n"; 876print $fh "\x{8FFFE}", "\n"; 877print $fh "\x{8FFFF}", "\n"; 878print $fh "\x{9FFFE}", "\n"; 879print $fh "\x{9FFFF}", "\n"; 880print $fh "\x{AFFFE}", "\n"; 881print $fh "\x{AFFFF}", "\n"; 882print $fh "\x{BFFFE}", "\n"; 883print $fh "\x{BFFFF}", "\n"; 884print $fh "\x{CFFFE}", "\n"; 885print $fh "\x{CFFFF}", "\n"; 886print $fh "\x{DFFFE}", "\n"; 887print $fh "\x{DFFFF}", "\n"; 888print $fh "\x{EFFFE}", "\n"; 889print $fh "\x{EFFFF}", "\n"; 890print $fh "\x{FFFFE}", "\n"; 891print $fh "\x{FFFFF}", "\n"; 892print $fh "\x{100000}", "\n"; 893print $fh "\x{10FFFE}", "\n"; 894print $fh "\x{10FFFF}", "\n"; 895print $fh "\x{110000}", "\n"; 896close $fh; 897EXPECT 898