xref: /llvm-project/llvm/unittests/Support/ConvertUTFTest.cpp (revision eb6e7e8f89a3d5c1cbc9856774ca00208753fb12)
1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Support/ConvertUTF.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "gtest/gtest.h"
12 #include <string>
13 #include <vector>
14 
15 using namespace llvm;
16 
17 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
18   // Src is the look of disapproval.
19   alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
20   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
21   std::string Result;
22   bool Success = convertUTF16ToUTF8String(Ref, Result);
23   EXPECT_TRUE(Success);
24   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
25   EXPECT_EQ(Expected, Result);
26 }
27 
28 TEST(ConvertUTFTest, ConvertUTF32LittleEndianToUTF8String) {
29   // Src is the look of disapproval.
30   alignas(UTF32) static const char Src[] =
31       "\xFF\xFE\x00\x00\xA0\x0C\x00\x00\x5F\x00\x00\x00\xA0\x0C\x00\x00";
32   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
33   std::string Result;
34   bool Success = convertUTF32ToUTF8String(Ref, Result);
35   EXPECT_TRUE(Success);
36   std::string Expected("\xE0\xB2\xA0_\xE0\xB2\xA0");
37   EXPECT_EQ(Expected, Result);
38 }
39 
40 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
41   // Src is the look of disapproval.
42   alignas(UTF16) static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
43   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
44   std::string Result;
45   bool Success = convertUTF16ToUTF8String(Ref, Result);
46   EXPECT_TRUE(Success);
47   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
48   EXPECT_EQ(Expected, Result);
49 }
50 
51 TEST(ConvertUTFTest, ConvertUTF32BigEndianToUTF8String) {
52   // Src is the look of disapproval.
53   alignas(UTF32) static const char Src[] =
54       "\x00\x00\xFE\xFF\x00\x00\x0C\xA0\x00\x00\x00\x5F\x00\x00\x0C\xA0";
55   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
56   std::string Result;
57   bool Success = convertUTF32ToUTF8String(Ref, Result);
58   EXPECT_TRUE(Success);
59   std::string Expected("\xE0\xB2\xA0_\xE0\xB2\xA0");
60   EXPECT_EQ(Expected, Result);
61 }
62 
63 TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
64   // Src is the look of disapproval.
65   static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
66   StringRef Ref(Src, sizeof(Src) - 1);
67   SmallVector<UTF16, 5> Result;
68   bool Success = convertUTF8ToUTF16String(Ref, Result);
69   EXPECT_TRUE(Success);
70   static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
71   ASSERT_EQ(3u, Result.size());
72   for (int I = 0, E = 3; I != E; ++I)
73     EXPECT_EQ(Expected[I], Result[I]);
74 }
75 
76 TEST(ConvertUTFTest, OddLengthInput) {
77   std::string Result;
78   bool Success = convertUTF16ToUTF8String(ArrayRef("xxxxx", 5), Result);
79   EXPECT_FALSE(Success);
80 }
81 
82 TEST(ConvertUTFTest, Empty) {
83   std::string Result;
84   bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(), Result);
85   EXPECT_TRUE(Success);
86   EXPECT_TRUE(Result.empty());
87 }
88 
89 TEST(ConvertUTFTest, HasUTF16BOM) {
90   bool HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xff\xfe", 2));
91   EXPECT_TRUE(HasBOM);
92   HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe\xff", 2));
93   EXPECT_TRUE(HasBOM);
94   HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe\xff ", 3));
95   EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
96   HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe\xff\x00asdf", 6));
97   EXPECT_TRUE(HasBOM);
98 
99   HasBOM = hasUTF16ByteOrderMark({});
100   EXPECT_FALSE(HasBOM);
101   HasBOM = hasUTF16ByteOrderMark(ArrayRef("\xfe", 1));
102   EXPECT_FALSE(HasBOM);
103 }
104 
105 TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
106   // Src is the look of disapproval.
107   alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
108   ArrayRef<UTF16> SrcRef = ArrayRef((const UTF16 *)Src, 4);
109   std::string Result;
110   bool Success = convertUTF16ToUTF8String(SrcRef, Result);
111   EXPECT_TRUE(Success);
112   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
113   EXPECT_EQ(Expected, Result);
114 }
115 
116 TEST(ConvertUTFTest, ConvertUTF8toWide) {
117   // Src is the look of disapproval.
118   static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
119   std::wstring Result;
120   bool Success = ConvertUTF8toWide((const char*)Src, Result);
121   EXPECT_TRUE(Success);
122   std::wstring Expected(L"\x0ca0_\x0ca0");
123   EXPECT_EQ(Expected, Result);
124   Result.clear();
125   Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
126   EXPECT_TRUE(Success);
127   EXPECT_EQ(Expected, Result);
128 }
129 
130 TEST(ConvertUTFTest, convertWideToUTF8) {
131   // Src is the look of disapproval.
132   static const wchar_t Src[] = L"\x0ca0_\x0ca0";
133   std::string Result;
134   bool Success = convertWideToUTF8(Src, Result);
135   EXPECT_TRUE(Success);
136   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
137   EXPECT_EQ(Expected, Result);
138 }
139 
140 struct ConvertUTFResultContainer {
141   ConversionResult ErrorCode;
142   std::vector<unsigned> UnicodeScalars;
143 
144   ConvertUTFResultContainer(ConversionResult ErrorCode)
145       : ErrorCode(ErrorCode) {}
146 
147   ConvertUTFResultContainer
148   withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
149               unsigned US2 = 0x110000, unsigned US3 = 0x110000,
150               unsigned US4 = 0x110000, unsigned US5 = 0x110000,
151               unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
152     ConvertUTFResultContainer Result(*this);
153     if (US0 != 0x110000)
154       Result.UnicodeScalars.push_back(US0);
155     if (US1 != 0x110000)
156       Result.UnicodeScalars.push_back(US1);
157     if (US2 != 0x110000)
158       Result.UnicodeScalars.push_back(US2);
159     if (US3 != 0x110000)
160       Result.UnicodeScalars.push_back(US3);
161     if (US4 != 0x110000)
162       Result.UnicodeScalars.push_back(US4);
163     if (US5 != 0x110000)
164       Result.UnicodeScalars.push_back(US5);
165     if (US6 != 0x110000)
166       Result.UnicodeScalars.push_back(US6);
167     if (US7 != 0x110000)
168       Result.UnicodeScalars.push_back(US7);
169     return Result;
170   }
171 };
172 
173 std::pair<ConversionResult, std::vector<unsigned>>
174 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
175   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
176 
177   const UTF8 *SourceNext = SourceStart;
178   std::vector<UTF32> Decoded(S.size(), 0);
179   UTF32 *TargetStart = Decoded.data();
180 
181   auto ErrorCode =
182       ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
183                          Decoded.data() + Decoded.size(), lenientConversion);
184 
185   Decoded.resize(TargetStart - Decoded.data());
186 
187   return std::make_pair(ErrorCode, Decoded);
188 }
189 
190 std::pair<ConversionResult, std::vector<unsigned>>
191 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
192   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
193 
194   const UTF8 *SourceNext = SourceStart;
195   std::vector<UTF32> Decoded(S.size(), 0);
196   UTF32 *TargetStart = Decoded.data();
197 
198   auto ErrorCode = ConvertUTF8toUTF32Partial(
199       &SourceNext, SourceStart + S.size(), &TargetStart,
200       Decoded.data() + Decoded.size(), lenientConversion);
201 
202   Decoded.resize(TargetStart - Decoded.data());
203 
204   return std::make_pair(ErrorCode, Decoded);
205 }
206 
207 ::testing::AssertionResult
208 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
209                                  StringRef S, bool Partial = false) {
210   ConversionResult ErrorCode;
211   std::vector<unsigned> Decoded;
212   if (!Partial)
213     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
214   else
215     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
216 
217   if (Expected.ErrorCode != ErrorCode)
218     return ::testing::AssertionFailure() << "Expected error code "
219                                          << Expected.ErrorCode << ", actual "
220                                          << ErrorCode;
221 
222   if (Expected.UnicodeScalars != Decoded)
223     return ::testing::AssertionFailure()
224            << "Expected lenient decoded result:\n"
225            << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
226            << "Actual result:\n" << ::testing::PrintToString(Decoded);
227 
228   return ::testing::AssertionSuccess();
229 }
230 
231 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
232 
233   //
234   // 1-byte sequences
235   //
236 
237   // U+0041 LATIN CAPITAL LETTER A
238   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
239       ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
240 
241   //
242   // 2-byte sequences
243   //
244 
245   // U+0283 LATIN SMALL LETTER ESH
246   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
247       ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
248       "\xca\x83"));
249 
250   // U+03BA GREEK SMALL LETTER KAPPA
251   // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
252   // U+03C3 GREEK SMALL LETTER SIGMA
253   // U+03BC GREEK SMALL LETTER MU
254   // U+03B5 GREEK SMALL LETTER EPSILON
255   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
256       ConvertUTFResultContainer(conversionOK)
257           .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
258       "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
259 
260   //
261   // 3-byte sequences
262   //
263 
264   // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
265   // U+6587 CJK UNIFIED IDEOGRAPH-6587
266   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
267       ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
268       "\xe4\xbe\x8b\xe6\x96\x87"));
269 
270   // U+D55C HANGUL SYLLABLE HAN
271   // U+AE00 HANGUL SYLLABLE GEUL
272   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
273       ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
274       "\xed\x95\x9c\xea\xb8\x80"));
275 
276   // U+1112 HANGUL CHOSEONG HIEUH
277   // U+1161 HANGUL JUNGSEONG A
278   // U+11AB HANGUL JONGSEONG NIEUN
279   // U+1100 HANGUL CHOSEONG KIYEOK
280   // U+1173 HANGUL JUNGSEONG EU
281   // U+11AF HANGUL JONGSEONG RIEUL
282   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
283       ConvertUTFResultContainer(conversionOK)
284           .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
285       "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
286       "\xe1\x86\xaf"));
287 
288   //
289   // 4-byte sequences
290   //
291 
292   // U+E0100 VARIATION SELECTOR-17
293   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
294       ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
295       "\xf3\xa0\x84\x80"));
296 
297   //
298   // First possible sequence of a certain length
299   //
300 
301   // U+0000 NULL
302   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
303       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
304       StringRef("\x00", 1)));
305 
306   // U+0080 PADDING CHARACTER
307   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
308       ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
309       "\xc2\x80"));
310 
311   // U+0800 SAMARITAN LETTER ALAF
312   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
313       ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
314       "\xe0\xa0\x80"));
315 
316   // U+10000 LINEAR B SYLLABLE B008 A
317   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
318       ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
319       "\xf0\x90\x80\x80"));
320 
321   // U+200000 (invalid)
322   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
323       ConvertUTFResultContainer(sourceIllegal)
324           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
325       "\xf8\x88\x80\x80\x80"));
326 
327   // U+4000000 (invalid)
328   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
329       ConvertUTFResultContainer(sourceIllegal)
330           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
331       "\xfc\x84\x80\x80\x80\x80"));
332 
333   //
334   // Last possible sequence of a certain length
335   //
336 
337   // U+007F DELETE
338   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
339       ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
340 
341   // U+07FF (unassigned)
342   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
343       ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
344       "\xdf\xbf"));
345 
346   // U+FFFF (noncharacter)
347   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
348       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
349       "\xef\xbf\xbf"));
350 
351   // U+1FFFFF (invalid)
352   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
353       ConvertUTFResultContainer(sourceIllegal)
354           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
355       "\xf7\xbf\xbf\xbf"));
356 
357   // U+3FFFFFF (invalid)
358   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
359       ConvertUTFResultContainer(sourceIllegal)
360           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
361       "\xfb\xbf\xbf\xbf\xbf"));
362 
363   // U+7FFFFFFF (invalid)
364   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
365       ConvertUTFResultContainer(sourceIllegal)
366           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
367       "\xfd\xbf\xbf\xbf\xbf\xbf"));
368 
369   //
370   // Other boundary conditions
371   //
372 
373   // U+D7FF (unassigned)
374   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
375       ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
376       "\xed\x9f\xbf"));
377 
378   // U+E000 (private use)
379   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
380       ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
381       "\xee\x80\x80"));
382 
383   // U+FFFD REPLACEMENT CHARACTER
384   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
385       ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
386       "\xef\xbf\xbd"));
387 
388   // U+10FFFF (noncharacter)
389   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
390       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
391       "\xf4\x8f\xbf\xbf"));
392 
393   // U+110000 (invalid)
394   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
395       ConvertUTFResultContainer(sourceIllegal)
396           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
397       "\xf4\x90\x80\x80"));
398 
399   //
400   // Unexpected continuation bytes
401   //
402 
403   // A sequence of unexpected continuation bytes that don't follow a first
404   // byte, every byte is a maximal subpart.
405 
406   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
407       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
408   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
409       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
410   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
411       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
412       "\x80\x80"));
413   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
414       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
415       "\x80\xbf"));
416   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
417       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
418       "\xbf\x80"));
419   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
420       ConvertUTFResultContainer(sourceIllegal)
421           .withScalars(0xfffd, 0xfffd, 0xfffd),
422       "\x80\xbf\x80"));
423   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
424       ConvertUTFResultContainer(sourceIllegal)
425           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
426       "\x80\xbf\x80\xbf"));
427   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
428       ConvertUTFResultContainer(sourceIllegal)
429           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
430       "\x80\xbf\x82\xbf\xaa"));
431   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
432       ConvertUTFResultContainer(sourceIllegal)
433           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
434       "\xaa\xb0\xbb\xbf\xaa\xa0"));
435   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
436       ConvertUTFResultContainer(sourceIllegal)
437           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
438       "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
439 
440   // All continuation bytes (0x80--0xbf).
441   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
442       ConvertUTFResultContainer(sourceIllegal)
443           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
444                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
445           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
446                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
447           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
448                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
449           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
450                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
451           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
452                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
453           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
454                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
455           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
456                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
457           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
458                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
459       "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
460       "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
461       "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
462       "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
463 
464   //
465   // Lonely start bytes
466   //
467 
468   // Start bytes of 2-byte sequences (0xc0--0xdf).
469   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
470       ConvertUTFResultContainer(sourceIllegal)
471           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
472                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
473           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
474                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
475           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
476                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
477           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
478                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
479       "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
480       "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
481 
482   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
483       ConvertUTFResultContainer(sourceIllegal)
484           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
485                        0xfffd, 0x0020, 0xfffd, 0x0020)
486           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
487                        0xfffd, 0x0020, 0xfffd, 0x0020)
488           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
489                        0xfffd, 0x0020, 0xfffd, 0x0020)
490           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
491                        0xfffd, 0x0020, 0xfffd, 0x0020)
492           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
493                        0xfffd, 0x0020, 0xfffd, 0x0020)
494           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
495                        0xfffd, 0x0020, 0xfffd, 0x0020)
496           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
497                        0xfffd, 0x0020, 0xfffd, 0x0020)
498           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
499                        0xfffd, 0x0020, 0xfffd, 0x0020),
500       "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
501       "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
502       "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
503       "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
504 
505   // Start bytes of 3-byte sequences (0xe0--0xef).
506   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
507       ConvertUTFResultContainer(sourceIllegal)
508           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
509                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
510           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
511                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
512       "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
513 
514   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
515       ConvertUTFResultContainer(sourceIllegal)
516           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
517                        0xfffd, 0x0020, 0xfffd, 0x0020)
518           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
519                        0xfffd, 0x0020, 0xfffd, 0x0020)
520           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
521                        0xfffd, 0x0020, 0xfffd, 0x0020)
522           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
523                        0xfffd, 0x0020, 0xfffd, 0x0020),
524       "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
525       "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
526 
527   // Start bytes of 4-byte sequences (0xf0--0xf7).
528   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
529       ConvertUTFResultContainer(sourceIllegal)
530           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
531                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
532       "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
533 
534   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
535       ConvertUTFResultContainer(sourceIllegal)
536           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
537                        0xfffd, 0x0020, 0xfffd, 0x0020)
538           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
539                        0xfffd, 0x0020, 0xfffd, 0x0020),
540       "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
541 
542   // Start bytes of 5-byte sequences (0xf8--0xfb).
543   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
544       ConvertUTFResultContainer(sourceIllegal)
545           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
546       "\xf8\xf9\xfa\xfb"));
547 
548   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
549       ConvertUTFResultContainer(sourceIllegal)
550           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
551                        0xfffd, 0x0020, 0xfffd, 0x0020),
552       "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
553 
554   // Start bytes of 6-byte sequences (0xfc--0xfd).
555   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
556       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
557       "\xfc\xfd"));
558 
559   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
560       ConvertUTFResultContainer(sourceIllegal)
561           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
562       "\xfc\x20\xfd\x20"));
563 
564   //
565   // Other bytes (0xc0--0xc1, 0xfe--0xff).
566   //
567 
568   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
569       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
570   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
571       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
572   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
573       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
574   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
575       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
576 
577   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
578       ConvertUTFResultContainer(sourceIllegal)
579           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
580       "\xc0\xc1\xfe\xff"));
581 
582   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
583       ConvertUTFResultContainer(sourceIllegal)
584           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
585       "\xfe\xfe\xff\xff"));
586 
587   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
588       ConvertUTFResultContainer(sourceIllegal)
589           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
590       "\xfe\x80\x80\x80\x80\x80"));
591 
592   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
593       ConvertUTFResultContainer(sourceIllegal)
594           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
595       "\xff\x80\x80\x80\x80\x80"));
596 
597   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
598       ConvertUTFResultContainer(sourceIllegal)
599           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
600                        0xfffd, 0x0020, 0xfffd, 0x0020),
601       "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
602 
603   //
604   // Sequences with one continuation byte missing
605   //
606 
607   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
608       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
609   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
610       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
611   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
612       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
613       "\xe0\xa0"));
614   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
615       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
616       "\xe0\xbf"));
617   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
618       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
619       "\xe1\x80"));
620   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
621       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
622       "\xec\xbf"));
623   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
624       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
625       "\xed\x80"));
626   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
627       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
628       "\xed\x9f"));
629   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
630       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
631       "\xee\x80"));
632   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
633       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
634       "\xef\xbf"));
635   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
636       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
637       "\xf0\x90\x80"));
638   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
639       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
640       "\xf0\xbf\xbf"));
641   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
642       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
643       "\xf1\x80\x80"));
644   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
645       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
646       "\xf3\xbf\xbf"));
647   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
648       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
649       "\xf4\x80\x80"));
650   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
651       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
652       "\xf4\x8f\xbf"));
653 
654   // Overlong sequences with one trailing byte missing.
655   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
656       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
657       "\xc0"));
658   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
659       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
660       "\xc1"));
661   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
662       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
663       "\xe0\x80"));
664   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
665       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
666       "\xe0\x9f"));
667   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
668       ConvertUTFResultContainer(sourceIllegal)
669           .withScalars(0xfffd, 0xfffd, 0xfffd),
670       "\xf0\x80\x80"));
671   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
672       ConvertUTFResultContainer(sourceIllegal)
673           .withScalars(0xfffd, 0xfffd, 0xfffd),
674       "\xf0\x8f\x80"));
675   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
676       ConvertUTFResultContainer(sourceIllegal)
677           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
678       "\xf8\x80\x80\x80"));
679   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
680       ConvertUTFResultContainer(sourceIllegal)
681           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
682       "\xfc\x80\x80\x80\x80"));
683 
684   // Sequences that represent surrogates with one trailing byte missing.
685   // High surrogates
686   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
687       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
688       "\xed\xa0"));
689   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
690       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
691       "\xed\xac"));
692   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
693       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
694       "\xed\xaf"));
695   // Low surrogates
696   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
697       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
698       "\xed\xb0"));
699   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
700       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
701       "\xed\xb4"));
702   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
703       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
704       "\xed\xbf"));
705 
706   // Ill-formed 4-byte sequences.
707   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
708   // U+1100xx (invalid)
709   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
710       ConvertUTFResultContainer(sourceIllegal)
711           .withScalars(0xfffd, 0xfffd, 0xfffd),
712       "\xf4\x90\x80"));
713   // U+13FBxx (invalid)
714   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
715       ConvertUTFResultContainer(sourceIllegal)
716           .withScalars(0xfffd, 0xfffd, 0xfffd),
717       "\xf4\xbf\xbf"));
718   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
719       ConvertUTFResultContainer(sourceIllegal)
720           .withScalars(0xfffd, 0xfffd, 0xfffd),
721       "\xf5\x80\x80"));
722   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
723       ConvertUTFResultContainer(sourceIllegal)
724           .withScalars(0xfffd, 0xfffd, 0xfffd),
725       "\xf6\x80\x80"));
726   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
727       ConvertUTFResultContainer(sourceIllegal)
728           .withScalars(0xfffd, 0xfffd, 0xfffd),
729       "\xf7\x80\x80"));
730   // U+1FFBxx (invalid)
731   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
732       ConvertUTFResultContainer(sourceIllegal)
733           .withScalars(0xfffd, 0xfffd, 0xfffd),
734       "\xf7\xbf\xbf"));
735 
736   // Ill-formed 5-byte sequences.
737   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
738   // U+2000xx (invalid)
739   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
740       ConvertUTFResultContainer(sourceIllegal)
741           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
742       "\xf8\x88\x80\x80"));
743   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
744       ConvertUTFResultContainer(sourceIllegal)
745           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
746       "\xf8\xbf\xbf\xbf"));
747   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
748       ConvertUTFResultContainer(sourceIllegal)
749           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
750       "\xf9\x80\x80\x80"));
751   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
752       ConvertUTFResultContainer(sourceIllegal)
753           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
754       "\xfa\x80\x80\x80"));
755   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
756       ConvertUTFResultContainer(sourceIllegal)
757           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
758       "\xfb\x80\x80\x80"));
759   // U+3FFFFxx (invalid)
760   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
761       ConvertUTFResultContainer(sourceIllegal)
762           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
763       "\xfb\xbf\xbf\xbf"));
764 
765   // Ill-formed 6-byte sequences.
766   // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
767   // U+40000xx (invalid)
768   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
769       ConvertUTFResultContainer(sourceIllegal)
770           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
771       "\xfc\x84\x80\x80\x80"));
772   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
773       ConvertUTFResultContainer(sourceIllegal)
774           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
775       "\xfc\xbf\xbf\xbf\xbf"));
776   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
777       ConvertUTFResultContainer(sourceIllegal)
778           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
779       "\xfd\x80\x80\x80\x80"));
780   // U+7FFFFFxx (invalid)
781   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
782       ConvertUTFResultContainer(sourceIllegal)
783           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
784       "\xfd\xbf\xbf\xbf\xbf"));
785 
786   //
787   // Sequences with two continuation bytes missing
788   //
789 
790   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
791       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
792       "\xf0\x90"));
793   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
794       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
795       "\xf0\xbf"));
796   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
797       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
798       "\xf1\x80"));
799   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
800       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
801       "\xf3\xbf"));
802   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
803       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
804       "\xf4\x80"));
805   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
806       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
807       "\xf4\x8f"));
808 
809   // Overlong sequences with two trailing byte missing.
810   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
811       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
812   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
813       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
814       "\xf0\x80"));
815   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
816       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
817       "\xf0\x8f"));
818   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
819       ConvertUTFResultContainer(sourceIllegal)
820           .withScalars(0xfffd, 0xfffd, 0xfffd),
821       "\xf8\x80\x80"));
822   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
823       ConvertUTFResultContainer(sourceIllegal)
824           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
825       "\xfc\x80\x80\x80"));
826 
827   // Sequences that represent surrogates with two trailing bytes missing.
828   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
829       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
830 
831   // Ill-formed 4-byte sequences.
832   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
833   // U+110yxx (invalid)
834   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
835       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
836       "\xf4\x90"));
837   // U+13Fyxx (invalid)
838   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
839       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
840       "\xf4\xbf"));
841   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
842       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
843       "\xf5\x80"));
844   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
845       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
846       "\xf6\x80"));
847   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
848       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
849       "\xf7\x80"));
850   // U+1FFyxx (invalid)
851   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
852       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
853       "\xf7\xbf"));
854 
855   // Ill-formed 5-byte sequences.
856   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
857   // U+200yxx (invalid)
858   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
859       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
860       "\xf8\x88\x80"));
861   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
862       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
863       "\xf8\xbf\xbf"));
864   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
865       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
866       "\xf9\x80\x80"));
867   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
868       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
869       "\xfa\x80\x80"));
870   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
871       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
872       "\xfb\x80\x80"));
873   // U+3FFFyxx (invalid)
874   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
875       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
876       "\xfb\xbf\xbf"));
877 
878   // Ill-formed 6-byte sequences.
879   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
880   // U+4000yxx (invalid)
881   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
882       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
883       "\xfc\x84\x80\x80"));
884   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
885       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
886       "\xfc\xbf\xbf\xbf"));
887   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
888       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
889       "\xfd\x80\x80\x80"));
890   // U+7FFFFyxx (invalid)
891   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
892       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
893       "\xfd\xbf\xbf\xbf"));
894 
895   //
896   // Sequences with three continuation bytes missing
897   //
898 
899   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
900       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
901   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
902       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
903   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
904       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
905   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
906       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
907   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
908       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
909 
910   // Broken overlong sequences.
911   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
912       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
913   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
914       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
915       "\xf8\x80"));
916   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
917       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
918       "\xfc\x80\x80"));
919 
920   // Ill-formed 4-byte sequences.
921   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
922   // U+14yyxx (invalid)
923   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
924       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
925   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
926       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
927   // U+1Cyyxx (invalid)
928   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
929       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
930 
931   // Ill-formed 5-byte sequences.
932   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
933   // U+20yyxx (invalid)
934   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
935       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
936       "\xf8\x88"));
937   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
938       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
939       "\xf8\xbf"));
940   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
941       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
942       "\xf9\x80"));
943   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
944       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
945       "\xfa\x80"));
946   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
947       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
948       "\xfb\x80"));
949   // U+3FCyyxx (invalid)
950   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
951       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
952       "\xfb\xbf"));
953 
954   // Ill-formed 6-byte sequences.
955   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
956   // U+400yyxx (invalid)
957   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
958       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
959       "\xfc\x84\x80"));
960   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
961       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
962       "\xfc\xbf\xbf"));
963   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
964       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
965       "\xfd\x80\x80"));
966   // U+7FFCyyxx (invalid)
967   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
968       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
969       "\xfd\xbf\xbf"));
970 
971   //
972   // Sequences with four continuation bytes missing
973   //
974 
975   // Ill-formed 5-byte sequences.
976   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
977   // U+uzyyxx (invalid)
978   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
979       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
980   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
981       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
982   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
983       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
984   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
985       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
986   // U+3zyyxx (invalid)
987   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
988       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
989 
990   // Broken overlong sequences.
991   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
992       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
993   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
994       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
995       "\xfc\x80"));
996 
997   // Ill-formed 6-byte sequences.
998   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
999   // U+uzzyyxx (invalid)
1000   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1001       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1002       "\xfc\x84"));
1003   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1004       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1005       "\xfc\xbf"));
1006   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1007       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1008       "\xfd\x80"));
1009   // U+7Fzzyyxx (invalid)
1010   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1011       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1012       "\xfd\xbf"));
1013 
1014   //
1015   // Sequences with five continuation bytes missing
1016   //
1017 
1018   // Ill-formed 6-byte sequences.
1019   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
1020   // U+uzzyyxx (invalid)
1021   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1022       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
1023   // U+uuzzyyxx (invalid)
1024   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1025       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
1026 
1027   //
1028   // Consecutive sequences with trailing bytes missing
1029   //
1030 
1031   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1032       ConvertUTFResultContainer(sourceIllegal)
1033           .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1034           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1035           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
1036           .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1037           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1038           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1039       "\xc0" "\xe0\x80" "\xf0\x80\x80"
1040       "\xf8\x80\x80\x80"
1041       "\xfc\x80\x80\x80\x80"
1042       "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
1043       "\xfb\xbf\xbf\xbf"
1044       "\xfd\xbf\xbf\xbf\xbf"));
1045 
1046   //
1047   // Overlong UTF-8 sequences
1048   //
1049 
1050   // U+002F SOLIDUS
1051   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1052       ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
1053 
1054   // Overlong sequences of the above.
1055   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1056       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1057       "\xc0\xaf"));
1058   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1059       ConvertUTFResultContainer(sourceIllegal)
1060           .withScalars(0xfffd, 0xfffd, 0xfffd),
1061       "\xe0\x80\xaf"));
1062   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1063       ConvertUTFResultContainer(sourceIllegal)
1064           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1065       "\xf0\x80\x80\xaf"));
1066   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1067       ConvertUTFResultContainer(sourceIllegal)
1068           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1069       "\xf8\x80\x80\x80\xaf"));
1070   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1071       ConvertUTFResultContainer(sourceIllegal)
1072           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1073       "\xfc\x80\x80\x80\x80\xaf"));
1074 
1075   // U+0000 NULL
1076   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1077       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1078       StringRef("\x00", 1)));
1079 
1080   // Overlong sequences of the above.
1081   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1082       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1083       "\xc0\x80"));
1084   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1085       ConvertUTFResultContainer(sourceIllegal)
1086           .withScalars(0xfffd, 0xfffd, 0xfffd),
1087       "\xe0\x80\x80"));
1088   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1089       ConvertUTFResultContainer(sourceIllegal)
1090           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1091       "\xf0\x80\x80\x80"));
1092   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1093       ConvertUTFResultContainer(sourceIllegal)
1094           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1095       "\xf8\x80\x80\x80\x80"));
1096   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1097       ConvertUTFResultContainer(sourceIllegal)
1098           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1099       "\xfc\x80\x80\x80\x80\x80"));
1100 
1101   // Other overlong sequences.
1102   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1103       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1104       "\xc0\xbf"));
1105   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1106       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1107       "\xc1\x80"));
1108   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1109       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1110       "\xc1\xbf"));
1111   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1112       ConvertUTFResultContainer(sourceIllegal)
1113           .withScalars(0xfffd, 0xfffd, 0xfffd),
1114       "\xe0\x9f\xbf"));
1115   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1116       ConvertUTFResultContainer(sourceIllegal)
1117           .withScalars(0xfffd, 0xfffd, 0xfffd),
1118       "\xed\xa0\x80"));
1119   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1120       ConvertUTFResultContainer(sourceIllegal)
1121           .withScalars(0xfffd, 0xfffd, 0xfffd),
1122       "\xed\xbf\xbf"));
1123   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1124       ConvertUTFResultContainer(sourceIllegal)
1125           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1126       "\xf0\x8f\x80\x80"));
1127   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1128       ConvertUTFResultContainer(sourceIllegal)
1129           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1130       "\xf0\x8f\xbf\xbf"));
1131   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1132       ConvertUTFResultContainer(sourceIllegal)
1133           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1134       "\xf8\x87\xbf\xbf\xbf"));
1135   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1136       ConvertUTFResultContainer(sourceIllegal)
1137           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1138       "\xfc\x83\xbf\xbf\xbf\xbf"));
1139 
1140   //
1141   // Isolated surrogates
1142   //
1143 
1144   // Unicode 6.3.0:
1145   //
1146   //    D71.  High-surrogate code point: A Unicode code point in the range
1147   //    U+D800 to U+DBFF.
1148   //
1149   //    D73.  Low-surrogate code point: A Unicode code point in the range
1150   //    U+DC00 to U+DFFF.
1151 
1152   // Note: U+E0100 is <DB40 DD00> in UTF16.
1153 
1154   // High surrogates
1155 
1156   // U+D800
1157   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1158       ConvertUTFResultContainer(sourceIllegal)
1159           .withScalars(0xfffd, 0xfffd, 0xfffd),
1160       "\xed\xa0\x80"));
1161 
1162   // U+DB40
1163   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1164       ConvertUTFResultContainer(sourceIllegal)
1165           .withScalars(0xfffd, 0xfffd, 0xfffd),
1166       "\xed\xac\xa0"));
1167 
1168   // U+DBFF
1169   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1170       ConvertUTFResultContainer(sourceIllegal)
1171           .withScalars(0xfffd, 0xfffd, 0xfffd),
1172       "\xed\xaf\xbf"));
1173 
1174   // Low surrogates
1175 
1176   // U+DC00
1177   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1178       ConvertUTFResultContainer(sourceIllegal)
1179           .withScalars(0xfffd, 0xfffd, 0xfffd),
1180       "\xed\xb0\x80"));
1181 
1182   // U+DD00
1183   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1184       ConvertUTFResultContainer(sourceIllegal)
1185           .withScalars(0xfffd, 0xfffd, 0xfffd),
1186       "\xed\xb4\x80"));
1187 
1188   // U+DFFF
1189   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1190       ConvertUTFResultContainer(sourceIllegal)
1191           .withScalars(0xfffd, 0xfffd, 0xfffd),
1192       "\xed\xbf\xbf"));
1193 
1194   // Surrogate pairs
1195 
1196   // U+D800 U+DC00
1197   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1198       ConvertUTFResultContainer(sourceIllegal)
1199           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1200       "\xed\xa0\x80\xed\xb0\x80"));
1201 
1202   // U+D800 U+DD00
1203   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1204       ConvertUTFResultContainer(sourceIllegal)
1205           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1206       "\xed\xa0\x80\xed\xb4\x80"));
1207 
1208   // U+D800 U+DFFF
1209   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1210       ConvertUTFResultContainer(sourceIllegal)
1211           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1212       "\xed\xa0\x80\xed\xbf\xbf"));
1213 
1214   // U+DB40 U+DC00
1215   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1216       ConvertUTFResultContainer(sourceIllegal)
1217           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1218       "\xed\xac\xa0\xed\xb0\x80"));
1219 
1220   // U+DB40 U+DD00
1221   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1222       ConvertUTFResultContainer(sourceIllegal)
1223           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1224       "\xed\xac\xa0\xed\xb4\x80"));
1225 
1226   // U+DB40 U+DFFF
1227   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1228       ConvertUTFResultContainer(sourceIllegal)
1229           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1230       "\xed\xac\xa0\xed\xbf\xbf"));
1231 
1232   // U+DBFF U+DC00
1233   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1234       ConvertUTFResultContainer(sourceIllegal)
1235           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1236       "\xed\xaf\xbf\xed\xb0\x80"));
1237 
1238   // U+DBFF U+DD00
1239   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1240       ConvertUTFResultContainer(sourceIllegal)
1241           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1242       "\xed\xaf\xbf\xed\xb4\x80"));
1243 
1244   // U+DBFF U+DFFF
1245   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1246       ConvertUTFResultContainer(sourceIllegal)
1247           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1248       "\xed\xaf\xbf\xed\xbf\xbf"));
1249 
1250   //
1251   // Noncharacters
1252   //
1253 
1254   // Unicode 6.3.0:
1255   //
1256   //    D14.  Noncharacter: A code point that is permanently reserved for
1257   //    internal use and that should never be interchanged. Noncharacters
1258   //    consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1259   //    and the values U+FDD0..U+FDEF.
1260 
1261   // U+FFFE
1262   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1263       ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1264       "\xef\xbf\xbe"));
1265 
1266   // U+FFFF
1267   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1268       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1269       "\xef\xbf\xbf"));
1270 
1271   // U+1FFFE
1272   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1273       ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1274       "\xf0\x9f\xbf\xbe"));
1275 
1276   // U+1FFFF
1277   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1278       ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1279       "\xf0\x9f\xbf\xbf"));
1280 
1281   // U+2FFFE
1282   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1283       ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1284       "\xf0\xaf\xbf\xbe"));
1285 
1286   // U+2FFFF
1287   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1288       ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1289       "\xf0\xaf\xbf\xbf"));
1290 
1291   // U+3FFFE
1292   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1293       ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1294       "\xf0\xbf\xbf\xbe"));
1295 
1296   // U+3FFFF
1297   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1298       ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1299       "\xf0\xbf\xbf\xbf"));
1300 
1301   // U+4FFFE
1302   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1303       ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1304       "\xf1\x8f\xbf\xbe"));
1305 
1306   // U+4FFFF
1307   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1308       ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1309       "\xf1\x8f\xbf\xbf"));
1310 
1311   // U+5FFFE
1312   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1313       ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1314       "\xf1\x9f\xbf\xbe"));
1315 
1316   // U+5FFFF
1317   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1318       ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1319       "\xf1\x9f\xbf\xbf"));
1320 
1321   // U+6FFFE
1322   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1323       ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1324       "\xf1\xaf\xbf\xbe"));
1325 
1326   // U+6FFFF
1327   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1328       ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1329       "\xf1\xaf\xbf\xbf"));
1330 
1331   // U+7FFFE
1332   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1333       ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1334       "\xf1\xbf\xbf\xbe"));
1335 
1336   // U+7FFFF
1337   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1338       ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1339       "\xf1\xbf\xbf\xbf"));
1340 
1341   // U+8FFFE
1342   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1343       ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1344       "\xf2\x8f\xbf\xbe"));
1345 
1346   // U+8FFFF
1347   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1348       ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1349       "\xf2\x8f\xbf\xbf"));
1350 
1351   // U+9FFFE
1352   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1353       ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1354       "\xf2\x9f\xbf\xbe"));
1355 
1356   // U+9FFFF
1357   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1358       ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1359       "\xf2\x9f\xbf\xbf"));
1360 
1361   // U+AFFFE
1362   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1363       ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1364       "\xf2\xaf\xbf\xbe"));
1365 
1366   // U+AFFFF
1367   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1368       ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1369       "\xf2\xaf\xbf\xbf"));
1370 
1371   // U+BFFFE
1372   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1373       ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1374       "\xf2\xbf\xbf\xbe"));
1375 
1376   // U+BFFFF
1377   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1378       ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1379       "\xf2\xbf\xbf\xbf"));
1380 
1381   // U+CFFFE
1382   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1383       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1384       "\xf3\x8f\xbf\xbe"));
1385 
1386   // U+CFFFF
1387   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1388       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1389       "\xf3\x8f\xbf\xbf"));
1390 
1391   // U+DFFFE
1392   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1393       ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1394       "\xf3\x9f\xbf\xbe"));
1395 
1396   // U+DFFFF
1397   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1398       ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1399       "\xf3\x9f\xbf\xbf"));
1400 
1401   // U+EFFFE
1402   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1403       ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1404       "\xf3\xaf\xbf\xbe"));
1405 
1406   // U+EFFFF
1407   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1408       ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1409       "\xf3\xaf\xbf\xbf"));
1410 
1411   // U+FFFFE
1412   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1413       ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1414       "\xf3\xbf\xbf\xbe"));
1415 
1416   // U+FFFFF
1417   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1418       ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1419       "\xf3\xbf\xbf\xbf"));
1420 
1421   // U+10FFFE
1422   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1423       ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1424       "\xf4\x8f\xbf\xbe"));
1425 
1426   // U+10FFFF
1427   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1428       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1429       "\xf4\x8f\xbf\xbf"));
1430 
1431   // U+FDD0
1432   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1433       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1434       "\xef\xb7\x90"));
1435 
1436   // U+FDD1
1437   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1438       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1439       "\xef\xb7\x91"));
1440 
1441   // U+FDD2
1442   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1443       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1444       "\xef\xb7\x92"));
1445 
1446   // U+FDD3
1447   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1448       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1449       "\xef\xb7\x93"));
1450 
1451   // U+FDD4
1452   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1453       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1454       "\xef\xb7\x94"));
1455 
1456   // U+FDD5
1457   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1458       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1459       "\xef\xb7\x95"));
1460 
1461   // U+FDD6
1462   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1463       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1464       "\xef\xb7\x96"));
1465 
1466   // U+FDD7
1467   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1468       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1469       "\xef\xb7\x97"));
1470 
1471   // U+FDD8
1472   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1473       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1474       "\xef\xb7\x98"));
1475 
1476   // U+FDD9
1477   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1478       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1479       "\xef\xb7\x99"));
1480 
1481   // U+FDDA
1482   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1483       ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1484       "\xef\xb7\x9a"));
1485 
1486   // U+FDDB
1487   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1488       ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1489       "\xef\xb7\x9b"));
1490 
1491   // U+FDDC
1492   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1493       ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1494       "\xef\xb7\x9c"));
1495 
1496   // U+FDDD
1497   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1498       ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1499       "\xef\xb7\x9d"));
1500 
1501   // U+FDDE
1502   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1503       ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1504       "\xef\xb7\x9e"));
1505 
1506   // U+FDDF
1507   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1508       ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1509       "\xef\xb7\x9f"));
1510 
1511   // U+FDE0
1512   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1513       ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1514       "\xef\xb7\xa0"));
1515 
1516   // U+FDE1
1517   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1518       ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1519       "\xef\xb7\xa1"));
1520 
1521   // U+FDE2
1522   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1523       ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1524       "\xef\xb7\xa2"));
1525 
1526   // U+FDE3
1527   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1528       ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1529       "\xef\xb7\xa3"));
1530 
1531   // U+FDE4
1532   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1533       ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1534       "\xef\xb7\xa4"));
1535 
1536   // U+FDE5
1537   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1538       ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1539       "\xef\xb7\xa5"));
1540 
1541   // U+FDE6
1542   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1543       ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1544       "\xef\xb7\xa6"));
1545 
1546   // U+FDE7
1547   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1548       ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1549       "\xef\xb7\xa7"));
1550 
1551   // U+FDE8
1552   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1553       ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1554       "\xef\xb7\xa8"));
1555 
1556   // U+FDE9
1557   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1558       ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1559       "\xef\xb7\xa9"));
1560 
1561   // U+FDEA
1562   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1563       ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1564       "\xef\xb7\xaa"));
1565 
1566   // U+FDEB
1567   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1568       ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1569       "\xef\xb7\xab"));
1570 
1571   // U+FDEC
1572   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1573       ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1574       "\xef\xb7\xac"));
1575 
1576   // U+FDED
1577   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1578       ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1579       "\xef\xb7\xad"));
1580 
1581   // U+FDEE
1582   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1583       ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1584       "\xef\xb7\xae"));
1585 
1586   // U+FDEF
1587   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1588       ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1589       "\xef\xb7\xaf"));
1590 
1591   // U+FDF0
1592   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1593       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1594       "\xef\xb7\xb0"));
1595 
1596   // U+FDF1
1597   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1598       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1599       "\xef\xb7\xb1"));
1600 
1601   // U+FDF2
1602   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1603       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1604       "\xef\xb7\xb2"));
1605 
1606   // U+FDF3
1607   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1608       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1609       "\xef\xb7\xb3"));
1610 
1611   // U+FDF4
1612   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1613       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1614       "\xef\xb7\xb4"));
1615 
1616   // U+FDF5
1617   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1618       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1619       "\xef\xb7\xb5"));
1620 
1621   // U+FDF6
1622   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1623       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1624       "\xef\xb7\xb6"));
1625 
1626   // U+FDF7
1627   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1628       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1629       "\xef\xb7\xb7"));
1630 
1631   // U+FDF8
1632   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1633       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1634       "\xef\xb7\xb8"));
1635 
1636   // U+FDF9
1637   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1638       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1639       "\xef\xb7\xb9"));
1640 
1641   // U+FDFA
1642   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1643       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1644       "\xef\xb7\xba"));
1645 
1646   // U+FDFB
1647   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1648       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1649       "\xef\xb7\xbb"));
1650 
1651   // U+FDFC
1652   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1653       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1654       "\xef\xb7\xbc"));
1655 
1656   // U+FDFD
1657   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1658       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1659       "\xef\xb7\xbd"));
1660 
1661   // U+FDFE
1662   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1663       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1664       "\xef\xb7\xbe"));
1665 
1666   // U+FDFF
1667   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1668       ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1669       "\xef\xb7\xbf"));
1670 }
1671 
1672 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1673   // U+0041 LATIN CAPITAL LETTER A
1674   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1675       ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1676       "\x41", true));
1677 
1678   //
1679   // Sequences with one continuation byte missing
1680   //
1681 
1682   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1683       ConvertUTFResultContainer(sourceExhausted),
1684       "\xc2", true));
1685   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1686       ConvertUTFResultContainer(sourceExhausted),
1687       "\xdf", true));
1688   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1689       ConvertUTFResultContainer(sourceExhausted),
1690       "\xe0\xa0", true));
1691   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1692       ConvertUTFResultContainer(sourceExhausted),
1693       "\xe0\xbf", true));
1694   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1695       ConvertUTFResultContainer(sourceExhausted),
1696       "\xe1\x80", true));
1697   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1698       ConvertUTFResultContainer(sourceExhausted),
1699       "\xec\xbf", true));
1700   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1701       ConvertUTFResultContainer(sourceExhausted),
1702       "\xed\x80", true));
1703   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1704       ConvertUTFResultContainer(sourceExhausted),
1705       "\xed\x9f", true));
1706   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1707       ConvertUTFResultContainer(sourceExhausted),
1708       "\xee\x80", true));
1709   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1710       ConvertUTFResultContainer(sourceExhausted),
1711       "\xef\xbf", true));
1712   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1713       ConvertUTFResultContainer(sourceExhausted),
1714       "\xf0\x90\x80", true));
1715   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1716       ConvertUTFResultContainer(sourceExhausted),
1717       "\xf0\xbf\xbf", true));
1718   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1719       ConvertUTFResultContainer(sourceExhausted),
1720       "\xf1\x80\x80", true));
1721   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1722       ConvertUTFResultContainer(sourceExhausted),
1723       "\xf3\xbf\xbf", true));
1724   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1725       ConvertUTFResultContainer(sourceExhausted),
1726       "\xf4\x80\x80", true));
1727   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1728       ConvertUTFResultContainer(sourceExhausted),
1729       "\xf4\x8f\xbf", true));
1730 
1731   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1732       ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
1733       "\x41\xc2", true));
1734 }
1735 
1736