1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Support/ConvertUTF.h" 10 #include "llvm/ADT/ArrayRef.h" 11 #include "gtest/gtest.h" 12 #include <string> 13 #include <vector> 14 15 using namespace llvm; 16 17 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) { 18 // Src is the look of disapproval. 19 alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; 20 ArrayRef<char> Ref(Src, sizeof(Src) - 1); 21 std::string Result; 22 bool Success = convertUTF16ToUTF8String(Ref, Result); 23 EXPECT_TRUE(Success); 24 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); 25 EXPECT_EQ(Expected, Result); 26 } 27 28 TEST(ConvertUTFTest, ConvertUTF32LittleEndianToUTF8String) { 29 // Src is the look of disapproval. 30 alignas(UTF32) static const char Src[] = 31 "\xFF\xFE\x00\x00\xA0\x0C\x00\x00\x5F\x00\x00\x00\xA0\x0C\x00\x00"; 32 ArrayRef<char> Ref(Src, sizeof(Src) - 1); 33 std::string Result; 34 bool Success = convertUTF32ToUTF8String(Ref, Result); 35 EXPECT_TRUE(Success); 36 std::string Expected("\xE0\xB2\xA0_\xE0\xB2\xA0"); 37 EXPECT_EQ(Expected, Result); 38 } 39 40 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) { 41 // Src is the look of disapproval. 42 alignas(UTF16) static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0"; 43 ArrayRef<char> Ref(Src, sizeof(Src) - 1); 44 std::string Result; 45 bool Success = convertUTF16ToUTF8String(Ref, Result); 46 EXPECT_TRUE(Success); 47 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); 48 EXPECT_EQ(Expected, Result); 49 } 50 51 TEST(ConvertUTFTest, ConvertUTF32BigEndianToUTF8String) { 52 // Src is the look of disapproval. 53 alignas(UTF32) static const char Src[] = 54 "\x00\x00\xFE\xFF\x00\x00\x0C\xA0\x00\x00\x00\x5F\x00\x00\x0C\xA0"; 55 ArrayRef<char> Ref(Src, sizeof(Src) - 1); 56 std::string Result; 57 bool Success = convertUTF32ToUTF8String(Ref, Result); 58 EXPECT_TRUE(Success); 59 std::string Expected("\xE0\xB2\xA0_\xE0\xB2\xA0"); 60 EXPECT_EQ(Expected, Result); 61 } 62 63 TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) { 64 // Src is the look of disapproval. 65 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0"; 66 StringRef Ref(Src, sizeof(Src) - 1); 67 SmallVector<UTF16, 5> Result; 68 bool Success = convertUTF8ToUTF16String(Ref, Result); 69 EXPECT_TRUE(Success); 70 static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0}; 71 ASSERT_EQ(3u, Result.size()); 72 for (int I = 0, E = 3; I != E; ++I) 73 EXPECT_EQ(Expected[I], Result[I]); 74 } 75 76 TEST(ConvertUTFTest, OddLengthInput) { 77 std::string Result; 78 bool Success = convertUTF16ToUTF8String(ArrayRef("xxxxx", 5), Result); 79 EXPECT_FALSE(Success); 80 } 81 82 TEST(ConvertUTFTest, Empty) { 83 std::string Result; 84 bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(), Result); 85 EXPECT_TRUE(Success); 86 EXPECT_TRUE(Result.empty()); 87 } 88 89 TEST(ConvertUTFTest, HasUTF16BOM) { 90 bool HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xff\xfe", 2)); 91 EXPECT_TRUE(HasBOM); 92 HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe\xff", 2)); 93 EXPECT_TRUE(HasBOM); 94 HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe\xff ", 3)); 95 EXPECT_TRUE(HasBOM); // Don't care about odd lengths. 96 HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe\xff\x00asdf", 6)); 97 EXPECT_TRUE(HasBOM); 98 99 HasBOM = hasUTF16ByteOrderMark({}); 100 EXPECT_FALSE(HasBOM); 101 HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe", 1)); 102 EXPECT_FALSE(HasBOM); 103 } 104 105 TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) { 106 // Src is the look of disapproval. 107 alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c"; 108 ArrayRef<UTF16> SrcRef = ArrayRef((const UTF16 *)Src, 4); 109 std::string Result; 110 bool Success = convertUTF16ToUTF8String(SrcRef, Result); 111 EXPECT_TRUE(Success); 112 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); 113 EXPECT_EQ(Expected, Result); 114 } 115 116 TEST(ConvertUTFTest, ConvertUTF8toWide) { 117 // Src is the look of disapproval. 118 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0"; 119 std::wstring Result; 120 bool Success = ConvertUTF8toWide((const char*)Src, Result); 121 EXPECT_TRUE(Success); 122 std::wstring Expected(L"\x0ca0_\x0ca0"); 123 EXPECT_EQ(Expected, Result); 124 Result.clear(); 125 Success = ConvertUTF8toWide(StringRef(Src, 7), Result); 126 EXPECT_TRUE(Success); 127 EXPECT_EQ(Expected, Result); 128 } 129 130 TEST(ConvertUTFTest, convertWideToUTF8) { 131 // Src is the look of disapproval. 132 static const wchar_t Src[] = L"\x0ca0_\x0ca0"; 133 std::string Result; 134 bool Success = convertWideToUTF8(Src, Result); 135 EXPECT_TRUE(Success); 136 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0"); 137 EXPECT_EQ(Expected, Result); 138 } 139 140 struct ConvertUTFResultContainer { 141 ConversionResult ErrorCode; 142 std::vector<unsigned> UnicodeScalars; 143 144 ConvertUTFResultContainer(ConversionResult ErrorCode) 145 : ErrorCode(ErrorCode) {} 146 147 ConvertUTFResultContainer 148 withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000, 149 unsigned US2 = 0x110000, unsigned US3 = 0x110000, 150 unsigned US4 = 0x110000, unsigned US5 = 0x110000, 151 unsigned US6 = 0x110000, unsigned US7 = 0x110000) { 152 ConvertUTFResultContainer Result(*this); 153 if (US0 != 0x110000) 154 Result.UnicodeScalars.push_back(US0); 155 if (US1 != 0x110000) 156 Result.UnicodeScalars.push_back(US1); 157 if (US2 != 0x110000) 158 Result.UnicodeScalars.push_back(US2); 159 if (US3 != 0x110000) 160 Result.UnicodeScalars.push_back(US3); 161 if (US4 != 0x110000) 162 Result.UnicodeScalars.push_back(US4); 163 if (US5 != 0x110000) 164 Result.UnicodeScalars.push_back(US5); 165 if (US6 != 0x110000) 166 Result.UnicodeScalars.push_back(US6); 167 if (US7 != 0x110000) 168 Result.UnicodeScalars.push_back(US7); 169 return Result; 170 } 171 }; 172 173 std::pair<ConversionResult, std::vector<unsigned>> 174 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) { 175 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data()); 176 177 const UTF8 *SourceNext = SourceStart; 178 std::vector<UTF32> Decoded(S.size(), 0); 179 UTF32 *TargetStart = Decoded.data(); 180 181 auto ErrorCode = 182 ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart, 183 Decoded.data() + Decoded.size(), lenientConversion); 184 185 Decoded.resize(TargetStart - Decoded.data()); 186 187 return std::make_pair(ErrorCode, Decoded); 188 } 189 190 std::pair<ConversionResult, std::vector<unsigned>> 191 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) { 192 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data()); 193 194 const UTF8 *SourceNext = SourceStart; 195 std::vector<UTF32> Decoded(S.size(), 0); 196 UTF32 *TargetStart = Decoded.data(); 197 198 auto ErrorCode = ConvertUTF8toUTF32Partial( 199 &SourceNext, SourceStart + S.size(), &TargetStart, 200 Decoded.data() + Decoded.size(), lenientConversion); 201 202 Decoded.resize(TargetStart - Decoded.data()); 203 204 return std::make_pair(ErrorCode, Decoded); 205 } 206 207 ::testing::AssertionResult 208 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected, 209 StringRef S, bool Partial = false) { 210 ConversionResult ErrorCode; 211 std::vector<unsigned> Decoded; 212 if (!Partial) 213 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S); 214 else 215 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S); 216 217 if (Expected.ErrorCode != ErrorCode) 218 return ::testing::AssertionFailure() << "Expected error code " 219 << Expected.ErrorCode << ", actual " 220 << ErrorCode; 221 222 if (Expected.UnicodeScalars != Decoded) 223 return ::testing::AssertionFailure() 224 << "Expected lenient decoded result:\n" 225 << ::testing::PrintToString(Expected.UnicodeScalars) << "\n" 226 << "Actual result:\n" << ::testing::PrintToString(Decoded); 227 228 return ::testing::AssertionSuccess(); 229 } 230 231 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) { 232 233 // 234 // 1-byte sequences 235 // 236 237 // U+0041 LATIN CAPITAL LETTER A 238 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 239 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41")); 240 241 // 242 // 2-byte sequences 243 // 244 245 // U+0283 LATIN SMALL LETTER ESH 246 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 247 ConvertUTFResultContainer(conversionOK).withScalars(0x0283), 248 "\xca\x83")); 249 250 // U+03BA GREEK SMALL LETTER KAPPA 251 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA 252 // U+03C3 GREEK SMALL LETTER SIGMA 253 // U+03BC GREEK SMALL LETTER MU 254 // U+03B5 GREEK SMALL LETTER EPSILON 255 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 256 ConvertUTFResultContainer(conversionOK) 257 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5), 258 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5")); 259 260 // 261 // 3-byte sequences 262 // 263 264 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B 265 // U+6587 CJK UNIFIED IDEOGRAPH-6587 266 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 267 ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587), 268 "\xe4\xbe\x8b\xe6\x96\x87")); 269 270 // U+D55C HANGUL SYLLABLE HAN 271 // U+AE00 HANGUL SYLLABLE GEUL 272 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 273 ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00), 274 "\xed\x95\x9c\xea\xb8\x80")); 275 276 // U+1112 HANGUL CHOSEONG HIEUH 277 // U+1161 HANGUL JUNGSEONG A 278 // U+11AB HANGUL JONGSEONG NIEUN 279 // U+1100 HANGUL CHOSEONG KIYEOK 280 // U+1173 HANGUL JUNGSEONG EU 281 // U+11AF HANGUL JONGSEONG RIEUL 282 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 283 ConvertUTFResultContainer(conversionOK) 284 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af), 285 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3" 286 "\xe1\x86\xaf")); 287 288 // 289 // 4-byte sequences 290 // 291 292 // U+E0100 VARIATION SELECTOR-17 293 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 294 ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100), 295 "\xf3\xa0\x84\x80")); 296 297 // 298 // First possible sequence of a certain length 299 // 300 301 // U+0000 NULL 302 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 303 ConvertUTFResultContainer(conversionOK).withScalars(0x0000), 304 StringRef("\x00", 1))); 305 306 // U+0080 PADDING CHARACTER 307 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 308 ConvertUTFResultContainer(conversionOK).withScalars(0x0080), 309 "\xc2\x80")); 310 311 // U+0800 SAMARITAN LETTER ALAF 312 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 313 ConvertUTFResultContainer(conversionOK).withScalars(0x0800), 314 "\xe0\xa0\x80")); 315 316 // U+10000 LINEAR B SYLLABLE B008 A 317 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 318 ConvertUTFResultContainer(conversionOK).withScalars(0x10000), 319 "\xf0\x90\x80\x80")); 320 321 // U+200000 (invalid) 322 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 323 ConvertUTFResultContainer(sourceIllegal) 324 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 325 "\xf8\x88\x80\x80\x80")); 326 327 // U+4000000 (invalid) 328 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 329 ConvertUTFResultContainer(sourceIllegal) 330 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 331 "\xfc\x84\x80\x80\x80\x80")); 332 333 // 334 // Last possible sequence of a certain length 335 // 336 337 // U+007F DELETE 338 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 339 ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f")); 340 341 // U+07FF (unassigned) 342 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 343 ConvertUTFResultContainer(conversionOK).withScalars(0x07ff), 344 "\xdf\xbf")); 345 346 // U+FFFF (noncharacter) 347 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 348 ConvertUTFResultContainer(conversionOK).withScalars(0xffff), 349 "\xef\xbf\xbf")); 350 351 // U+1FFFFF (invalid) 352 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 353 ConvertUTFResultContainer(sourceIllegal) 354 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 355 "\xf7\xbf\xbf\xbf")); 356 357 // U+3FFFFFF (invalid) 358 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 359 ConvertUTFResultContainer(sourceIllegal) 360 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 361 "\xfb\xbf\xbf\xbf\xbf")); 362 363 // U+7FFFFFFF (invalid) 364 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 365 ConvertUTFResultContainer(sourceIllegal) 366 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 367 "\xfd\xbf\xbf\xbf\xbf\xbf")); 368 369 // 370 // Other boundary conditions 371 // 372 373 // U+D7FF (unassigned) 374 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 375 ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff), 376 "\xed\x9f\xbf")); 377 378 // U+E000 (private use) 379 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 380 ConvertUTFResultContainer(conversionOK).withScalars(0xe000), 381 "\xee\x80\x80")); 382 383 // U+FFFD REPLACEMENT CHARACTER 384 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 385 ConvertUTFResultContainer(conversionOK).withScalars(0xfffd), 386 "\xef\xbf\xbd")); 387 388 // U+10FFFF (noncharacter) 389 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 390 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff), 391 "\xf4\x8f\xbf\xbf")); 392 393 // U+110000 (invalid) 394 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 395 ConvertUTFResultContainer(sourceIllegal) 396 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 397 "\xf4\x90\x80\x80")); 398 399 // 400 // Unexpected continuation bytes 401 // 402 403 // A sequence of unexpected continuation bytes that don't follow a first 404 // byte, every byte is a maximal subpart. 405 406 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 407 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80")); 408 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 409 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf")); 410 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 411 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 412 "\x80\x80")); 413 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 414 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 415 "\x80\xbf")); 416 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 417 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 418 "\xbf\x80")); 419 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 420 ConvertUTFResultContainer(sourceIllegal) 421 .withScalars(0xfffd, 0xfffd, 0xfffd), 422 "\x80\xbf\x80")); 423 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 424 ConvertUTFResultContainer(sourceIllegal) 425 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 426 "\x80\xbf\x80\xbf")); 427 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 428 ConvertUTFResultContainer(sourceIllegal) 429 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 430 "\x80\xbf\x82\xbf\xaa")); 431 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 432 ConvertUTFResultContainer(sourceIllegal) 433 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 434 "\xaa\xb0\xbb\xbf\xaa\xa0")); 435 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 436 ConvertUTFResultContainer(sourceIllegal) 437 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 438 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f")); 439 440 // All continuation bytes (0x80--0xbf). 441 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 442 ConvertUTFResultContainer(sourceIllegal) 443 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 444 0xfffd, 0xfffd, 0xfffd, 0xfffd) 445 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 446 0xfffd, 0xfffd, 0xfffd, 0xfffd) 447 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 448 0xfffd, 0xfffd, 0xfffd, 0xfffd) 449 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 450 0xfffd, 0xfffd, 0xfffd, 0xfffd) 451 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 452 0xfffd, 0xfffd, 0xfffd, 0xfffd) 453 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 454 0xfffd, 0xfffd, 0xfffd, 0xfffd) 455 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 456 0xfffd, 0xfffd, 0xfffd, 0xfffd) 457 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 458 0xfffd, 0xfffd, 0xfffd, 0xfffd), 459 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 460 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 461 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 462 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf")); 463 464 // 465 // Lonely start bytes 466 // 467 468 // Start bytes of 2-byte sequences (0xc0--0xdf). 469 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 470 ConvertUTFResultContainer(sourceIllegal) 471 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 472 0xfffd, 0xfffd, 0xfffd, 0xfffd) 473 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 474 0xfffd, 0xfffd, 0xfffd, 0xfffd) 475 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 476 0xfffd, 0xfffd, 0xfffd, 0xfffd) 477 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 478 0xfffd, 0xfffd, 0xfffd, 0xfffd), 479 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 480 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf")); 481 482 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 483 ConvertUTFResultContainer(sourceIllegal) 484 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 485 0xfffd, 0x0020, 0xfffd, 0x0020) 486 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 487 0xfffd, 0x0020, 0xfffd, 0x0020) 488 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 489 0xfffd, 0x0020, 0xfffd, 0x0020) 490 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 491 0xfffd, 0x0020, 0xfffd, 0x0020) 492 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 493 0xfffd, 0x0020, 0xfffd, 0x0020) 494 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 495 0xfffd, 0x0020, 0xfffd, 0x0020) 496 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 497 0xfffd, 0x0020, 0xfffd, 0x0020) 498 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 499 0xfffd, 0x0020, 0xfffd, 0x0020), 500 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20" 501 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20" 502 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20" 503 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20")); 504 505 // Start bytes of 3-byte sequences (0xe0--0xef). 506 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 507 ConvertUTFResultContainer(sourceIllegal) 508 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 509 0xfffd, 0xfffd, 0xfffd, 0xfffd) 510 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 511 0xfffd, 0xfffd, 0xfffd, 0xfffd), 512 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef")); 513 514 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 515 ConvertUTFResultContainer(sourceIllegal) 516 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 517 0xfffd, 0x0020, 0xfffd, 0x0020) 518 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 519 0xfffd, 0x0020, 0xfffd, 0x0020) 520 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 521 0xfffd, 0x0020, 0xfffd, 0x0020) 522 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 523 0xfffd, 0x0020, 0xfffd, 0x0020), 524 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20" 525 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20")); 526 527 // Start bytes of 4-byte sequences (0xf0--0xf7). 528 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 529 ConvertUTFResultContainer(sourceIllegal) 530 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 531 0xfffd, 0xfffd, 0xfffd, 0xfffd), 532 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7")); 533 534 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 535 ConvertUTFResultContainer(sourceIllegal) 536 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 537 0xfffd, 0x0020, 0xfffd, 0x0020) 538 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 539 0xfffd, 0x0020, 0xfffd, 0x0020), 540 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20")); 541 542 // Start bytes of 5-byte sequences (0xf8--0xfb). 543 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 544 ConvertUTFResultContainer(sourceIllegal) 545 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 546 "\xf8\xf9\xfa\xfb")); 547 548 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 549 ConvertUTFResultContainer(sourceIllegal) 550 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 551 0xfffd, 0x0020, 0xfffd, 0x0020), 552 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20")); 553 554 // Start bytes of 6-byte sequences (0xfc--0xfd). 555 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 556 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 557 "\xfc\xfd")); 558 559 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 560 ConvertUTFResultContainer(sourceIllegal) 561 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020), 562 "\xfc\x20\xfd\x20")); 563 564 // 565 // Other bytes (0xc0--0xc1, 0xfe--0xff). 566 // 567 568 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 569 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0")); 570 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 571 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1")); 572 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 573 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe")); 574 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 575 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff")); 576 577 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 578 ConvertUTFResultContainer(sourceIllegal) 579 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 580 "\xc0\xc1\xfe\xff")); 581 582 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 583 ConvertUTFResultContainer(sourceIllegal) 584 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 585 "\xfe\xfe\xff\xff")); 586 587 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 588 ConvertUTFResultContainer(sourceIllegal) 589 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 590 "\xfe\x80\x80\x80\x80\x80")); 591 592 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 593 ConvertUTFResultContainer(sourceIllegal) 594 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 595 "\xff\x80\x80\x80\x80\x80")); 596 597 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 598 ConvertUTFResultContainer(sourceIllegal) 599 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020, 600 0xfffd, 0x0020, 0xfffd, 0x0020), 601 "\xc0\x20\xc1\x20\xfe\x20\xff\x20")); 602 603 // 604 // Sequences with one continuation byte missing 605 // 606 607 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 608 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2")); 609 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 610 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf")); 611 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 612 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 613 "\xe0\xa0")); 614 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 615 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 616 "\xe0\xbf")); 617 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 618 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 619 "\xe1\x80")); 620 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 621 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 622 "\xec\xbf")); 623 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 624 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 625 "\xed\x80")); 626 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 627 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 628 "\xed\x9f")); 629 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 630 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 631 "\xee\x80")); 632 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 633 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 634 "\xef\xbf")); 635 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 636 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 637 "\xf0\x90\x80")); 638 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 639 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 640 "\xf0\xbf\xbf")); 641 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 642 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 643 "\xf1\x80\x80")); 644 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 645 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 646 "\xf3\xbf\xbf")); 647 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 648 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 649 "\xf4\x80\x80")); 650 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 651 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 652 "\xf4\x8f\xbf")); 653 654 // Overlong sequences with one trailing byte missing. 655 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 656 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 657 "\xc0")); 658 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 659 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 660 "\xc1")); 661 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 662 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 663 "\xe0\x80")); 664 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 665 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 666 "\xe0\x9f")); 667 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 668 ConvertUTFResultContainer(sourceIllegal) 669 .withScalars(0xfffd, 0xfffd, 0xfffd), 670 "\xf0\x80\x80")); 671 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 672 ConvertUTFResultContainer(sourceIllegal) 673 .withScalars(0xfffd, 0xfffd, 0xfffd), 674 "\xf0\x8f\x80")); 675 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 676 ConvertUTFResultContainer(sourceIllegal) 677 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 678 "\xf8\x80\x80\x80")); 679 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 680 ConvertUTFResultContainer(sourceIllegal) 681 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 682 "\xfc\x80\x80\x80\x80")); 683 684 // Sequences that represent surrogates with one trailing byte missing. 685 // High surrogates 686 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 687 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 688 "\xed\xa0")); 689 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 690 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 691 "\xed\xac")); 692 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 693 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 694 "\xed\xaf")); 695 // Low surrogates 696 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 697 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 698 "\xed\xb0")); 699 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 700 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 701 "\xed\xb4")); 702 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 703 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 704 "\xed\xbf")); 705 706 // Ill-formed 4-byte sequences. 707 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx 708 // U+1100xx (invalid) 709 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 710 ConvertUTFResultContainer(sourceIllegal) 711 .withScalars(0xfffd, 0xfffd, 0xfffd), 712 "\xf4\x90\x80")); 713 // U+13FBxx (invalid) 714 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 715 ConvertUTFResultContainer(sourceIllegal) 716 .withScalars(0xfffd, 0xfffd, 0xfffd), 717 "\xf4\xbf\xbf")); 718 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 719 ConvertUTFResultContainer(sourceIllegal) 720 .withScalars(0xfffd, 0xfffd, 0xfffd), 721 "\xf5\x80\x80")); 722 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 723 ConvertUTFResultContainer(sourceIllegal) 724 .withScalars(0xfffd, 0xfffd, 0xfffd), 725 "\xf6\x80\x80")); 726 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 727 ConvertUTFResultContainer(sourceIllegal) 728 .withScalars(0xfffd, 0xfffd, 0xfffd), 729 "\xf7\x80\x80")); 730 // U+1FFBxx (invalid) 731 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 732 ConvertUTFResultContainer(sourceIllegal) 733 .withScalars(0xfffd, 0xfffd, 0xfffd), 734 "\xf7\xbf\xbf")); 735 736 // Ill-formed 5-byte sequences. 737 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx 738 // U+2000xx (invalid) 739 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 740 ConvertUTFResultContainer(sourceIllegal) 741 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 742 "\xf8\x88\x80\x80")); 743 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 744 ConvertUTFResultContainer(sourceIllegal) 745 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 746 "\xf8\xbf\xbf\xbf")); 747 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 748 ConvertUTFResultContainer(sourceIllegal) 749 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 750 "\xf9\x80\x80\x80")); 751 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 752 ConvertUTFResultContainer(sourceIllegal) 753 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 754 "\xfa\x80\x80\x80")); 755 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 756 ConvertUTFResultContainer(sourceIllegal) 757 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 758 "\xfb\x80\x80\x80")); 759 // U+3FFFFxx (invalid) 760 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 761 ConvertUTFResultContainer(sourceIllegal) 762 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 763 "\xfb\xbf\xbf\xbf")); 764 765 // Ill-formed 6-byte sequences. 766 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx 767 // U+40000xx (invalid) 768 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 769 ConvertUTFResultContainer(sourceIllegal) 770 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 771 "\xfc\x84\x80\x80\x80")); 772 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 773 ConvertUTFResultContainer(sourceIllegal) 774 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 775 "\xfc\xbf\xbf\xbf\xbf")); 776 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 777 ConvertUTFResultContainer(sourceIllegal) 778 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 779 "\xfd\x80\x80\x80\x80")); 780 // U+7FFFFFxx (invalid) 781 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 782 ConvertUTFResultContainer(sourceIllegal) 783 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 784 "\xfd\xbf\xbf\xbf\xbf")); 785 786 // 787 // Sequences with two continuation bytes missing 788 // 789 790 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 791 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 792 "\xf0\x90")); 793 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 794 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 795 "\xf0\xbf")); 796 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 797 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 798 "\xf1\x80")); 799 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 800 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 801 "\xf3\xbf")); 802 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 803 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 804 "\xf4\x80")); 805 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 806 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), 807 "\xf4\x8f")); 808 809 // Overlong sequences with two trailing byte missing. 810 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 811 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0")); 812 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 813 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 814 "\xf0\x80")); 815 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 816 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 817 "\xf0\x8f")); 818 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 819 ConvertUTFResultContainer(sourceIllegal) 820 .withScalars(0xfffd, 0xfffd, 0xfffd), 821 "\xf8\x80\x80")); 822 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 823 ConvertUTFResultContainer(sourceIllegal) 824 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 825 "\xfc\x80\x80\x80")); 826 827 // Sequences that represent surrogates with two trailing bytes missing. 828 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 829 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed")); 830 831 // Ill-formed 4-byte sequences. 832 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx 833 // U+110yxx (invalid) 834 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 835 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 836 "\xf4\x90")); 837 // U+13Fyxx (invalid) 838 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 839 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 840 "\xf4\xbf")); 841 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 842 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 843 "\xf5\x80")); 844 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 845 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 846 "\xf6\x80")); 847 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 848 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 849 "\xf7\x80")); 850 // U+1FFyxx (invalid) 851 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 852 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 853 "\xf7\xbf")); 854 855 // Ill-formed 5-byte sequences. 856 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx 857 // U+200yxx (invalid) 858 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 859 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 860 "\xf8\x88\x80")); 861 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 862 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 863 "\xf8\xbf\xbf")); 864 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 865 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 866 "\xf9\x80\x80")); 867 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 868 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 869 "\xfa\x80\x80")); 870 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 871 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 872 "\xfb\x80\x80")); 873 // U+3FFFyxx (invalid) 874 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 875 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 876 "\xfb\xbf\xbf")); 877 878 // Ill-formed 6-byte sequences. 879 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx 880 // U+4000yxx (invalid) 881 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 882 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 883 "\xfc\x84\x80\x80")); 884 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 885 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 886 "\xfc\xbf\xbf\xbf")); 887 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 888 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 889 "\xfd\x80\x80\x80")); 890 // U+7FFFFyxx (invalid) 891 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 892 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 893 "\xfd\xbf\xbf\xbf")); 894 895 // 896 // Sequences with three continuation bytes missing 897 // 898 899 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 900 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0")); 901 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 902 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1")); 903 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 904 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2")); 905 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 906 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3")); 907 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 908 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4")); 909 910 // Broken overlong sequences. 911 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 912 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0")); 913 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 914 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 915 "\xf8\x80")); 916 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 917 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 918 "\xfc\x80\x80")); 919 920 // Ill-formed 4-byte sequences. 921 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx 922 // U+14yyxx (invalid) 923 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 924 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5")); 925 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 926 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6")); 927 // U+1Cyyxx (invalid) 928 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 929 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7")); 930 931 // Ill-formed 5-byte sequences. 932 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx 933 // U+20yyxx (invalid) 934 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 935 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 936 "\xf8\x88")); 937 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 938 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 939 "\xf8\xbf")); 940 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 941 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 942 "\xf9\x80")); 943 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 944 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 945 "\xfa\x80")); 946 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 947 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 948 "\xfb\x80")); 949 // U+3FCyyxx (invalid) 950 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 951 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 952 "\xfb\xbf")); 953 954 // Ill-formed 6-byte sequences. 955 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx 956 // U+400yyxx (invalid) 957 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 958 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 959 "\xfc\x84\x80")); 960 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 961 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 962 "\xfc\xbf\xbf")); 963 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 964 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 965 "\xfd\x80\x80")); 966 // U+7FFCyyxx (invalid) 967 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 968 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd), 969 "\xfd\xbf\xbf")); 970 971 // 972 // Sequences with four continuation bytes missing 973 // 974 975 // Ill-formed 5-byte sequences. 976 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx 977 // U+uzyyxx (invalid) 978 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 979 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8")); 980 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 981 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9")); 982 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 983 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa")); 984 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 985 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb")); 986 // U+3zyyxx (invalid) 987 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 988 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb")); 989 990 // Broken overlong sequences. 991 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 992 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8")); 993 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 994 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 995 "\xfc\x80")); 996 997 // Ill-formed 6-byte sequences. 998 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx 999 // U+uzzyyxx (invalid) 1000 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1001 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 1002 "\xfc\x84")); 1003 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1004 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 1005 "\xfc\xbf")); 1006 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1007 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 1008 "\xfd\x80")); 1009 // U+7Fzzyyxx (invalid) 1010 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1011 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 1012 "\xfd\xbf")); 1013 1014 // 1015 // Sequences with five continuation bytes missing 1016 // 1017 1018 // Ill-formed 6-byte sequences. 1019 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx 1020 // U+uzzyyxx (invalid) 1021 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1022 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc")); 1023 // U+uuzzyyxx (invalid) 1024 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1025 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd")); 1026 1027 // 1028 // Consecutive sequences with trailing bytes missing 1029 // 1030 1031 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1032 ConvertUTFResultContainer(sourceIllegal) 1033 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd) 1034 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd) 1035 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd) 1036 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd) 1037 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd) 1038 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1039 "\xc0" "\xe0\x80" "\xf0\x80\x80" 1040 "\xf8\x80\x80\x80" 1041 "\xfc\x80\x80\x80\x80" 1042 "\xdf" "\xef\xbf" "\xf7\xbf\xbf" 1043 "\xfb\xbf\xbf\xbf" 1044 "\xfd\xbf\xbf\xbf\xbf")); 1045 1046 // 1047 // Overlong UTF-8 sequences 1048 // 1049 1050 // U+002F SOLIDUS 1051 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1052 ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f")); 1053 1054 // Overlong sequences of the above. 1055 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1056 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 1057 "\xc0\xaf")); 1058 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1059 ConvertUTFResultContainer(sourceIllegal) 1060 .withScalars(0xfffd, 0xfffd, 0xfffd), 1061 "\xe0\x80\xaf")); 1062 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1063 ConvertUTFResultContainer(sourceIllegal) 1064 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 1065 "\xf0\x80\x80\xaf")); 1066 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1067 ConvertUTFResultContainer(sourceIllegal) 1068 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1069 "\xf8\x80\x80\x80\xaf")); 1070 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1071 ConvertUTFResultContainer(sourceIllegal) 1072 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1073 "\xfc\x80\x80\x80\x80\xaf")); 1074 1075 // U+0000 NULL 1076 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1077 ConvertUTFResultContainer(conversionOK).withScalars(0x0000), 1078 StringRef("\x00", 1))); 1079 1080 // Overlong sequences of the above. 1081 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1082 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 1083 "\xc0\x80")); 1084 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1085 ConvertUTFResultContainer(sourceIllegal) 1086 .withScalars(0xfffd, 0xfffd, 0xfffd), 1087 "\xe0\x80\x80")); 1088 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1089 ConvertUTFResultContainer(sourceIllegal) 1090 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 1091 "\xf0\x80\x80\x80")); 1092 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1093 ConvertUTFResultContainer(sourceIllegal) 1094 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1095 "\xf8\x80\x80\x80\x80")); 1096 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1097 ConvertUTFResultContainer(sourceIllegal) 1098 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1099 "\xfc\x80\x80\x80\x80\x80")); 1100 1101 // Other overlong sequences. 1102 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1103 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 1104 "\xc0\xbf")); 1105 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1106 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 1107 "\xc1\x80")); 1108 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1109 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd), 1110 "\xc1\xbf")); 1111 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1112 ConvertUTFResultContainer(sourceIllegal) 1113 .withScalars(0xfffd, 0xfffd, 0xfffd), 1114 "\xe0\x9f\xbf")); 1115 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1116 ConvertUTFResultContainer(sourceIllegal) 1117 .withScalars(0xfffd, 0xfffd, 0xfffd), 1118 "\xed\xa0\x80")); 1119 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1120 ConvertUTFResultContainer(sourceIllegal) 1121 .withScalars(0xfffd, 0xfffd, 0xfffd), 1122 "\xed\xbf\xbf")); 1123 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1124 ConvertUTFResultContainer(sourceIllegal) 1125 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 1126 "\xf0\x8f\x80\x80")); 1127 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1128 ConvertUTFResultContainer(sourceIllegal) 1129 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd), 1130 "\xf0\x8f\xbf\xbf")); 1131 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1132 ConvertUTFResultContainer(sourceIllegal) 1133 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1134 "\xf8\x87\xbf\xbf\xbf")); 1135 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1136 ConvertUTFResultContainer(sourceIllegal) 1137 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1138 "\xfc\x83\xbf\xbf\xbf\xbf")); 1139 1140 // 1141 // Isolated surrogates 1142 // 1143 1144 // Unicode 6.3.0: 1145 // 1146 // D71. High-surrogate code point: A Unicode code point in the range 1147 // U+D800 to U+DBFF. 1148 // 1149 // D73. Low-surrogate code point: A Unicode code point in the range 1150 // U+DC00 to U+DFFF. 1151 1152 // Note: U+E0100 is <DB40 DD00> in UTF16. 1153 1154 // High surrogates 1155 1156 // U+D800 1157 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1158 ConvertUTFResultContainer(sourceIllegal) 1159 .withScalars(0xfffd, 0xfffd, 0xfffd), 1160 "\xed\xa0\x80")); 1161 1162 // U+DB40 1163 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1164 ConvertUTFResultContainer(sourceIllegal) 1165 .withScalars(0xfffd, 0xfffd, 0xfffd), 1166 "\xed\xac\xa0")); 1167 1168 // U+DBFF 1169 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1170 ConvertUTFResultContainer(sourceIllegal) 1171 .withScalars(0xfffd, 0xfffd, 0xfffd), 1172 "\xed\xaf\xbf")); 1173 1174 // Low surrogates 1175 1176 // U+DC00 1177 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1178 ConvertUTFResultContainer(sourceIllegal) 1179 .withScalars(0xfffd, 0xfffd, 0xfffd), 1180 "\xed\xb0\x80")); 1181 1182 // U+DD00 1183 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1184 ConvertUTFResultContainer(sourceIllegal) 1185 .withScalars(0xfffd, 0xfffd, 0xfffd), 1186 "\xed\xb4\x80")); 1187 1188 // U+DFFF 1189 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1190 ConvertUTFResultContainer(sourceIllegal) 1191 .withScalars(0xfffd, 0xfffd, 0xfffd), 1192 "\xed\xbf\xbf")); 1193 1194 // Surrogate pairs 1195 1196 // U+D800 U+DC00 1197 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1198 ConvertUTFResultContainer(sourceIllegal) 1199 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1200 "\xed\xa0\x80\xed\xb0\x80")); 1201 1202 // U+D800 U+DD00 1203 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1204 ConvertUTFResultContainer(sourceIllegal) 1205 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1206 "\xed\xa0\x80\xed\xb4\x80")); 1207 1208 // U+D800 U+DFFF 1209 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1210 ConvertUTFResultContainer(sourceIllegal) 1211 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1212 "\xed\xa0\x80\xed\xbf\xbf")); 1213 1214 // U+DB40 U+DC00 1215 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1216 ConvertUTFResultContainer(sourceIllegal) 1217 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1218 "\xed\xac\xa0\xed\xb0\x80")); 1219 1220 // U+DB40 U+DD00 1221 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1222 ConvertUTFResultContainer(sourceIllegal) 1223 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1224 "\xed\xac\xa0\xed\xb4\x80")); 1225 1226 // U+DB40 U+DFFF 1227 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1228 ConvertUTFResultContainer(sourceIllegal) 1229 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1230 "\xed\xac\xa0\xed\xbf\xbf")); 1231 1232 // U+DBFF U+DC00 1233 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1234 ConvertUTFResultContainer(sourceIllegal) 1235 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1236 "\xed\xaf\xbf\xed\xb0\x80")); 1237 1238 // U+DBFF U+DD00 1239 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1240 ConvertUTFResultContainer(sourceIllegal) 1241 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1242 "\xed\xaf\xbf\xed\xb4\x80")); 1243 1244 // U+DBFF U+DFFF 1245 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1246 ConvertUTFResultContainer(sourceIllegal) 1247 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd), 1248 "\xed\xaf\xbf\xed\xbf\xbf")); 1249 1250 // 1251 // Noncharacters 1252 // 1253 1254 // Unicode 6.3.0: 1255 // 1256 // D14. Noncharacter: A code point that is permanently reserved for 1257 // internal use and that should never be interchanged. Noncharacters 1258 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016) 1259 // and the values U+FDD0..U+FDEF. 1260 1261 // U+FFFE 1262 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1263 ConvertUTFResultContainer(conversionOK).withScalars(0xfffe), 1264 "\xef\xbf\xbe")); 1265 1266 // U+FFFF 1267 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1268 ConvertUTFResultContainer(conversionOK).withScalars(0xffff), 1269 "\xef\xbf\xbf")); 1270 1271 // U+1FFFE 1272 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1273 ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe), 1274 "\xf0\x9f\xbf\xbe")); 1275 1276 // U+1FFFF 1277 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1278 ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff), 1279 "\xf0\x9f\xbf\xbf")); 1280 1281 // U+2FFFE 1282 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1283 ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe), 1284 "\xf0\xaf\xbf\xbe")); 1285 1286 // U+2FFFF 1287 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1288 ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff), 1289 "\xf0\xaf\xbf\xbf")); 1290 1291 // U+3FFFE 1292 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1293 ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe), 1294 "\xf0\xbf\xbf\xbe")); 1295 1296 // U+3FFFF 1297 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1298 ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff), 1299 "\xf0\xbf\xbf\xbf")); 1300 1301 // U+4FFFE 1302 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1303 ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe), 1304 "\xf1\x8f\xbf\xbe")); 1305 1306 // U+4FFFF 1307 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1308 ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff), 1309 "\xf1\x8f\xbf\xbf")); 1310 1311 // U+5FFFE 1312 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1313 ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe), 1314 "\xf1\x9f\xbf\xbe")); 1315 1316 // U+5FFFF 1317 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1318 ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff), 1319 "\xf1\x9f\xbf\xbf")); 1320 1321 // U+6FFFE 1322 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1323 ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe), 1324 "\xf1\xaf\xbf\xbe")); 1325 1326 // U+6FFFF 1327 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1328 ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff), 1329 "\xf1\xaf\xbf\xbf")); 1330 1331 // U+7FFFE 1332 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1333 ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe), 1334 "\xf1\xbf\xbf\xbe")); 1335 1336 // U+7FFFF 1337 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1338 ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff), 1339 "\xf1\xbf\xbf\xbf")); 1340 1341 // U+8FFFE 1342 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1343 ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe), 1344 "\xf2\x8f\xbf\xbe")); 1345 1346 // U+8FFFF 1347 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1348 ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff), 1349 "\xf2\x8f\xbf\xbf")); 1350 1351 // U+9FFFE 1352 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1353 ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe), 1354 "\xf2\x9f\xbf\xbe")); 1355 1356 // U+9FFFF 1357 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1358 ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff), 1359 "\xf2\x9f\xbf\xbf")); 1360 1361 // U+AFFFE 1362 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1363 ConvertUTFResultContainer(conversionOK).withScalars(0xafffe), 1364 "\xf2\xaf\xbf\xbe")); 1365 1366 // U+AFFFF 1367 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1368 ConvertUTFResultContainer(conversionOK).withScalars(0xaffff), 1369 "\xf2\xaf\xbf\xbf")); 1370 1371 // U+BFFFE 1372 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1373 ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe), 1374 "\xf2\xbf\xbf\xbe")); 1375 1376 // U+BFFFF 1377 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1378 ConvertUTFResultContainer(conversionOK).withScalars(0xbffff), 1379 "\xf2\xbf\xbf\xbf")); 1380 1381 // U+CFFFE 1382 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1383 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe), 1384 "\xf3\x8f\xbf\xbe")); 1385 1386 // U+CFFFF 1387 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1388 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF), 1389 "\xf3\x8f\xbf\xbf")); 1390 1391 // U+DFFFE 1392 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1393 ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe), 1394 "\xf3\x9f\xbf\xbe")); 1395 1396 // U+DFFFF 1397 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1398 ConvertUTFResultContainer(conversionOK).withScalars(0xdffff), 1399 "\xf3\x9f\xbf\xbf")); 1400 1401 // U+EFFFE 1402 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1403 ConvertUTFResultContainer(conversionOK).withScalars(0xefffe), 1404 "\xf3\xaf\xbf\xbe")); 1405 1406 // U+EFFFF 1407 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1408 ConvertUTFResultContainer(conversionOK).withScalars(0xeffff), 1409 "\xf3\xaf\xbf\xbf")); 1410 1411 // U+FFFFE 1412 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1413 ConvertUTFResultContainer(conversionOK).withScalars(0xffffe), 1414 "\xf3\xbf\xbf\xbe")); 1415 1416 // U+FFFFF 1417 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1418 ConvertUTFResultContainer(conversionOK).withScalars(0xfffff), 1419 "\xf3\xbf\xbf\xbf")); 1420 1421 // U+10FFFE 1422 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1423 ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe), 1424 "\xf4\x8f\xbf\xbe")); 1425 1426 // U+10FFFF 1427 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1428 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff), 1429 "\xf4\x8f\xbf\xbf")); 1430 1431 // U+FDD0 1432 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1433 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0), 1434 "\xef\xb7\x90")); 1435 1436 // U+FDD1 1437 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1438 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1), 1439 "\xef\xb7\x91")); 1440 1441 // U+FDD2 1442 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1443 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2), 1444 "\xef\xb7\x92")); 1445 1446 // U+FDD3 1447 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1448 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3), 1449 "\xef\xb7\x93")); 1450 1451 // U+FDD4 1452 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1453 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4), 1454 "\xef\xb7\x94")); 1455 1456 // U+FDD5 1457 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1458 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5), 1459 "\xef\xb7\x95")); 1460 1461 // U+FDD6 1462 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1463 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6), 1464 "\xef\xb7\x96")); 1465 1466 // U+FDD7 1467 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1468 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7), 1469 "\xef\xb7\x97")); 1470 1471 // U+FDD8 1472 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1473 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8), 1474 "\xef\xb7\x98")); 1475 1476 // U+FDD9 1477 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1478 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9), 1479 "\xef\xb7\x99")); 1480 1481 // U+FDDA 1482 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1483 ConvertUTFResultContainer(conversionOK).withScalars(0xfdda), 1484 "\xef\xb7\x9a")); 1485 1486 // U+FDDB 1487 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1488 ConvertUTFResultContainer(conversionOK).withScalars(0xfddb), 1489 "\xef\xb7\x9b")); 1490 1491 // U+FDDC 1492 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1493 ConvertUTFResultContainer(conversionOK).withScalars(0xfddc), 1494 "\xef\xb7\x9c")); 1495 1496 // U+FDDD 1497 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1498 ConvertUTFResultContainer(conversionOK).withScalars(0xfddd), 1499 "\xef\xb7\x9d")); 1500 1501 // U+FDDE 1502 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1503 ConvertUTFResultContainer(conversionOK).withScalars(0xfdde), 1504 "\xef\xb7\x9e")); 1505 1506 // U+FDDF 1507 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1508 ConvertUTFResultContainer(conversionOK).withScalars(0xfddf), 1509 "\xef\xb7\x9f")); 1510 1511 // U+FDE0 1512 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1513 ConvertUTFResultContainer(conversionOK).withScalars(0xfde0), 1514 "\xef\xb7\xa0")); 1515 1516 // U+FDE1 1517 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1518 ConvertUTFResultContainer(conversionOK).withScalars(0xfde1), 1519 "\xef\xb7\xa1")); 1520 1521 // U+FDE2 1522 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1523 ConvertUTFResultContainer(conversionOK).withScalars(0xfde2), 1524 "\xef\xb7\xa2")); 1525 1526 // U+FDE3 1527 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1528 ConvertUTFResultContainer(conversionOK).withScalars(0xfde3), 1529 "\xef\xb7\xa3")); 1530 1531 // U+FDE4 1532 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1533 ConvertUTFResultContainer(conversionOK).withScalars(0xfde4), 1534 "\xef\xb7\xa4")); 1535 1536 // U+FDE5 1537 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1538 ConvertUTFResultContainer(conversionOK).withScalars(0xfde5), 1539 "\xef\xb7\xa5")); 1540 1541 // U+FDE6 1542 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1543 ConvertUTFResultContainer(conversionOK).withScalars(0xfde6), 1544 "\xef\xb7\xa6")); 1545 1546 // U+FDE7 1547 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1548 ConvertUTFResultContainer(conversionOK).withScalars(0xfde7), 1549 "\xef\xb7\xa7")); 1550 1551 // U+FDE8 1552 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1553 ConvertUTFResultContainer(conversionOK).withScalars(0xfde8), 1554 "\xef\xb7\xa8")); 1555 1556 // U+FDE9 1557 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1558 ConvertUTFResultContainer(conversionOK).withScalars(0xfde9), 1559 "\xef\xb7\xa9")); 1560 1561 // U+FDEA 1562 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1563 ConvertUTFResultContainer(conversionOK).withScalars(0xfdea), 1564 "\xef\xb7\xaa")); 1565 1566 // U+FDEB 1567 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1568 ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb), 1569 "\xef\xb7\xab")); 1570 1571 // U+FDEC 1572 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1573 ConvertUTFResultContainer(conversionOK).withScalars(0xfdec), 1574 "\xef\xb7\xac")); 1575 1576 // U+FDED 1577 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1578 ConvertUTFResultContainer(conversionOK).withScalars(0xfded), 1579 "\xef\xb7\xad")); 1580 1581 // U+FDEE 1582 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1583 ConvertUTFResultContainer(conversionOK).withScalars(0xfdee), 1584 "\xef\xb7\xae")); 1585 1586 // U+FDEF 1587 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1588 ConvertUTFResultContainer(conversionOK).withScalars(0xfdef), 1589 "\xef\xb7\xaf")); 1590 1591 // U+FDF0 1592 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1593 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0), 1594 "\xef\xb7\xb0")); 1595 1596 // U+FDF1 1597 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1598 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1), 1599 "\xef\xb7\xb1")); 1600 1601 // U+FDF2 1602 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1603 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2), 1604 "\xef\xb7\xb2")); 1605 1606 // U+FDF3 1607 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1608 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3), 1609 "\xef\xb7\xb3")); 1610 1611 // U+FDF4 1612 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1613 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4), 1614 "\xef\xb7\xb4")); 1615 1616 // U+FDF5 1617 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1618 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5), 1619 "\xef\xb7\xb5")); 1620 1621 // U+FDF6 1622 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1623 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6), 1624 "\xef\xb7\xb6")); 1625 1626 // U+FDF7 1627 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1628 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7), 1629 "\xef\xb7\xb7")); 1630 1631 // U+FDF8 1632 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1633 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8), 1634 "\xef\xb7\xb8")); 1635 1636 // U+FDF9 1637 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1638 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9), 1639 "\xef\xb7\xb9")); 1640 1641 // U+FDFA 1642 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1643 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa), 1644 "\xef\xb7\xba")); 1645 1646 // U+FDFB 1647 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1648 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb), 1649 "\xef\xb7\xbb")); 1650 1651 // U+FDFC 1652 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1653 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc), 1654 "\xef\xb7\xbc")); 1655 1656 // U+FDFD 1657 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1658 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd), 1659 "\xef\xb7\xbd")); 1660 1661 // U+FDFE 1662 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1663 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe), 1664 "\xef\xb7\xbe")); 1665 1666 // U+FDFF 1667 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1668 ConvertUTFResultContainer(conversionOK).withScalars(0xfdff), 1669 "\xef\xb7\xbf")); 1670 } 1671 1672 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) { 1673 // U+0041 LATIN CAPITAL LETTER A 1674 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1675 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), 1676 "\x41", true)); 1677 1678 // 1679 // Sequences with one continuation byte missing 1680 // 1681 1682 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1683 ConvertUTFResultContainer(sourceExhausted), 1684 "\xc2", true)); 1685 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1686 ConvertUTFResultContainer(sourceExhausted), 1687 "\xdf", true)); 1688 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1689 ConvertUTFResultContainer(sourceExhausted), 1690 "\xe0\xa0", true)); 1691 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1692 ConvertUTFResultContainer(sourceExhausted), 1693 "\xe0\xbf", true)); 1694 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1695 ConvertUTFResultContainer(sourceExhausted), 1696 "\xe1\x80", true)); 1697 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1698 ConvertUTFResultContainer(sourceExhausted), 1699 "\xec\xbf", true)); 1700 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1701 ConvertUTFResultContainer(sourceExhausted), 1702 "\xed\x80", true)); 1703 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1704 ConvertUTFResultContainer(sourceExhausted), 1705 "\xed\x9f", true)); 1706 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1707 ConvertUTFResultContainer(sourceExhausted), 1708 "\xee\x80", true)); 1709 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1710 ConvertUTFResultContainer(sourceExhausted), 1711 "\xef\xbf", true)); 1712 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1713 ConvertUTFResultContainer(sourceExhausted), 1714 "\xf0\x90\x80", true)); 1715 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1716 ConvertUTFResultContainer(sourceExhausted), 1717 "\xf0\xbf\xbf", true)); 1718 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1719 ConvertUTFResultContainer(sourceExhausted), 1720 "\xf1\x80\x80", true)); 1721 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1722 ConvertUTFResultContainer(sourceExhausted), 1723 "\xf3\xbf\xbf", true)); 1724 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1725 ConvertUTFResultContainer(sourceExhausted), 1726 "\xf4\x80\x80", true)); 1727 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1728 ConvertUTFResultContainer(sourceExhausted), 1729 "\xf4\x8f\xbf", true)); 1730 1731 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars( 1732 ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041), 1733 "\x41\xc2", true)); 1734 } 1735 1736