1390840fbSDimitrij Mijoski //===----------------------------------------------------------------------===//
2390840fbSDimitrij Mijoski //
3390840fbSDimitrij Mijoski // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4390840fbSDimitrij Mijoski // See https://llvm.org/LICENSE.txt for license information.
5390840fbSDimitrij Mijoski // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6390840fbSDimitrij Mijoski //
7390840fbSDimitrij Mijoski //===----------------------------------------------------------------------===//
8390840fbSDimitrij Mijoski
9390840fbSDimitrij Mijoski // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT
10*34975009SLouis Dionne
11*34975009SLouis Dionne // Requires the fix in 390840f.
12*34975009SLouis Dionne // XFAIL: using-built-library-before-llvm-18
13390840fbSDimitrij Mijoski
14390840fbSDimitrij Mijoski #include <algorithm>
15390840fbSDimitrij Mijoski #include <cassert>
16390840fbSDimitrij Mijoski #include <codecvt>
17390840fbSDimitrij Mijoski #include <locale>
18390840fbSDimitrij Mijoski
19390840fbSDimitrij Mijoski #include "test_macros.h"
20390840fbSDimitrij Mijoski
21390840fbSDimitrij Mijoski struct test_offsets_ok {
22390840fbSDimitrij Mijoski size_t in_size;
23390840fbSDimitrij Mijoski size_t out_size;
24390840fbSDimitrij Mijoski };
25390840fbSDimitrij Mijoski struct test_offsets_partial {
26390840fbSDimitrij Mijoski size_t in_size;
27390840fbSDimitrij Mijoski size_t out_size;
28390840fbSDimitrij Mijoski size_t expected_in_next;
29390840fbSDimitrij Mijoski size_t expected_out_next;
30390840fbSDimitrij Mijoski };
31390840fbSDimitrij Mijoski
32390840fbSDimitrij Mijoski template <class CharT>
33390840fbSDimitrij Mijoski struct test_offsets_error {
34390840fbSDimitrij Mijoski size_t in_size;
35390840fbSDimitrij Mijoski size_t out_size;
36390840fbSDimitrij Mijoski size_t expected_in_next;
37390840fbSDimitrij Mijoski size_t expected_out_next;
38390840fbSDimitrij Mijoski CharT replace_char;
39390840fbSDimitrij Mijoski size_t replace_pos;
40390840fbSDimitrij Mijoski };
41390840fbSDimitrij Mijoski
42390840fbSDimitrij Mijoski #define array_size(x) (sizeof(x) / sizeof(x)[0])
43390840fbSDimitrij Mijoski
44390840fbSDimitrij Mijoski using std::begin;
45390840fbSDimitrij Mijoski using std::char_traits;
46390840fbSDimitrij Mijoski using std::codecvt_base;
47390840fbSDimitrij Mijoski using std::copy;
48390840fbSDimitrij Mijoski using std::end;
49390840fbSDimitrij Mijoski
50390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_utf32_in_ok(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)51390840fbSDimitrij Mijoski void utf8_to_utf32_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
52390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
53390840fbSDimitrij Mijoski const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
54390840fbSDimitrij Mijoski const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
55390840fbSDimitrij Mijoski static_assert(array_size(input) == 11, "");
56390840fbSDimitrij Mijoski static_assert(array_size(expected) == 5, "");
57390840fbSDimitrij Mijoski
58390840fbSDimitrij Mijoski ExternT in[array_size(input)];
59390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
60390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
61390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
62390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(in) == 10);
63390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(exp) == 4);
64390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 4}};
65390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
66390840fbSDimitrij Mijoski test_offsets_ok t = *it;
67390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
68390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
69390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
70390840fbSDimitrij Mijoski mbstate_t state = {};
71390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
72390840fbSDimitrij Mijoski InternT* out_next = nullptr;
73390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
74390840fbSDimitrij Mijoski
75390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
76390840fbSDimitrij Mijoski assert(res == cvt.ok);
77390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
78390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
79390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
80390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
81390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
82390840fbSDimitrij Mijoski
83390840fbSDimitrij Mijoski state = mbstate_t();
84390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
85390840fbSDimitrij Mijoski assert(len >= 0);
86390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
87390840fbSDimitrij Mijoski }
88390840fbSDimitrij Mijoski
89390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
90390840fbSDimitrij Mijoski test_offsets_ok t = *it;
91390840fbSDimitrij Mijoski InternT out[array_size(exp)] = {};
92390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
93390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
94390840fbSDimitrij Mijoski mbstate_t state = {};
95390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
96390840fbSDimitrij Mijoski InternT* out_next = nullptr;
97390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
98390840fbSDimitrij Mijoski
99390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
100390840fbSDimitrij Mijoski assert(res == cvt.ok);
101390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
102390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
103390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
104390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
105390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
106390840fbSDimitrij Mijoski
107390840fbSDimitrij Mijoski state = mbstate_t();
108390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, array_size(out));
109390840fbSDimitrij Mijoski assert(len >= 0);
110390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
111390840fbSDimitrij Mijoski }
112390840fbSDimitrij Mijoski }
113390840fbSDimitrij Mijoski
114390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_utf32_in_partial(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)115390840fbSDimitrij Mijoski void utf8_to_utf32_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
116390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
117390840fbSDimitrij Mijoski const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
118390840fbSDimitrij Mijoski const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
119390840fbSDimitrij Mijoski static_assert(array_size(input) == 11, "");
120390840fbSDimitrij Mijoski static_assert(array_size(expected) == 5, "");
121390840fbSDimitrij Mijoski
122390840fbSDimitrij Mijoski ExternT in[array_size(input)];
123390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
124390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
125390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
126390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(in) == 10);
127390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(exp) == 4);
128390840fbSDimitrij Mijoski
129390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
130390840fbSDimitrij Mijoski {1, 0, 0, 0}, // no space for first CP
131390840fbSDimitrij Mijoski
132390840fbSDimitrij Mijoski {3, 1, 1, 1}, // no space for second CP
133390840fbSDimitrij Mijoski {2, 2, 1, 1}, // incomplete second CP
134390840fbSDimitrij Mijoski {2, 1, 1, 1}, // incomplete second CP, and no space for it
135390840fbSDimitrij Mijoski
136390840fbSDimitrij Mijoski {6, 2, 3, 2}, // no space for third CP
137390840fbSDimitrij Mijoski {4, 3, 3, 2}, // incomplete third CP
138390840fbSDimitrij Mijoski {5, 3, 3, 2}, // incomplete third CP
139390840fbSDimitrij Mijoski {4, 2, 3, 2}, // incomplete third CP, and no space for it
140390840fbSDimitrij Mijoski {5, 2, 3, 2}, // incomplete third CP, and no space for it
141390840fbSDimitrij Mijoski
142390840fbSDimitrij Mijoski {10, 3, 6, 3}, // no space for fourth CP
143390840fbSDimitrij Mijoski {7, 4, 6, 3}, // incomplete fourth CP
144390840fbSDimitrij Mijoski {8, 4, 6, 3}, // incomplete fourth CP
145390840fbSDimitrij Mijoski {9, 4, 6, 3}, // incomplete fourth CP
146390840fbSDimitrij Mijoski {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
147390840fbSDimitrij Mijoski {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
148390840fbSDimitrij Mijoski {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
149390840fbSDimitrij Mijoski };
150390840fbSDimitrij Mijoski
151390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
152390840fbSDimitrij Mijoski test_offsets_partial t = *it;
153390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
154390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
155390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
156390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
157390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
158390840fbSDimitrij Mijoski mbstate_t state = {};
159390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
160390840fbSDimitrij Mijoski InternT* out_next = nullptr;
161390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
162390840fbSDimitrij Mijoski
163390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
164390840fbSDimitrij Mijoski assert(res == cvt.partial);
165390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
166390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
167390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
168390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
169390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
170390840fbSDimitrij Mijoski
171390840fbSDimitrij Mijoski state = mbstate_t();
172390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
173390840fbSDimitrij Mijoski assert(len >= 0);
174390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
175390840fbSDimitrij Mijoski }
176390840fbSDimitrij Mijoski }
177390840fbSDimitrij Mijoski
178390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_utf32_in_error(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)179390840fbSDimitrij Mijoski void utf8_to_utf32_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
180390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
181390840fbSDimitrij Mijoski const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
182390840fbSDimitrij Mijoski const char32_t expected[] = {'b', 0x0448, 0xD700, 0x10AAAA, 0};
183390840fbSDimitrij Mijoski static_assert(array_size(input) == 11, "");
184390840fbSDimitrij Mijoski static_assert(array_size(expected) == 5, "");
185390840fbSDimitrij Mijoski
186390840fbSDimitrij Mijoski ExternT in[array_size(input)];
187390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
188390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
189390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
190390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(in) == 10);
191390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(exp) == 4);
192390840fbSDimitrij Mijoski
193390840fbSDimitrij Mijoski // There are 5 classes of errors in UTF-8 decoding
194390840fbSDimitrij Mijoski // 1. Missing leading byte
195390840fbSDimitrij Mijoski // 2. Missing trailing byte
196390840fbSDimitrij Mijoski // 3. Surrogate CP
197390840fbSDimitrij Mijoski // 4. Overlong sequence
198390840fbSDimitrij Mijoski // 5. CP out of Unicode range
199390840fbSDimitrij Mijoski test_offsets_error<unsigned char> offsets[] = {
200390840fbSDimitrij Mijoski
201390840fbSDimitrij Mijoski // 1. Missing leading byte. We will replace the leading byte with
202390840fbSDimitrij Mijoski // non-leading byte, such as a byte that is always invalid or a trailing
203390840fbSDimitrij Mijoski // byte.
204390840fbSDimitrij Mijoski
205390840fbSDimitrij Mijoski // replace leading byte with invalid byte
206390840fbSDimitrij Mijoski {1, 4, 0, 0, 0xFF, 0},
207390840fbSDimitrij Mijoski {3, 4, 1, 1, 0xFF, 1},
208390840fbSDimitrij Mijoski {6, 4, 3, 2, 0xFF, 3},
209390840fbSDimitrij Mijoski {10, 4, 6, 3, 0xFF, 6},
210390840fbSDimitrij Mijoski
211390840fbSDimitrij Mijoski // replace leading byte with trailing byte
212390840fbSDimitrij Mijoski {1, 4, 0, 0, 0b10101010, 0},
213390840fbSDimitrij Mijoski {3, 4, 1, 1, 0b10101010, 1},
214390840fbSDimitrij Mijoski {6, 4, 3, 2, 0b10101010, 3},
215390840fbSDimitrij Mijoski {10, 4, 6, 3, 0b10101010, 6},
216390840fbSDimitrij Mijoski
217390840fbSDimitrij Mijoski // 2. Missing trailing byte. We will replace the trailing byte with
218390840fbSDimitrij Mijoski // non-trailing byte, such as a byte that is always invalid or a leading
219390840fbSDimitrij Mijoski // byte (simple ASCII byte in our case).
220390840fbSDimitrij Mijoski
221390840fbSDimitrij Mijoski // replace first trailing byte with ASCII byte
222390840fbSDimitrij Mijoski {3, 4, 1, 1, 'z', 2},
223390840fbSDimitrij Mijoski {6, 4, 3, 2, 'z', 4},
224390840fbSDimitrij Mijoski {10, 4, 6, 3, 'z', 7},
225390840fbSDimitrij Mijoski
226390840fbSDimitrij Mijoski // replace first trailing byte with invalid byte
227390840fbSDimitrij Mijoski {3, 4, 1, 1, 0xFF, 2},
228390840fbSDimitrij Mijoski {6, 4, 3, 2, 0xFF, 4},
229390840fbSDimitrij Mijoski {10, 4, 6, 3, 0xFF, 7},
230390840fbSDimitrij Mijoski
231390840fbSDimitrij Mijoski // replace second trailing byte with ASCII byte
232390840fbSDimitrij Mijoski {6, 4, 3, 2, 'z', 5},
233390840fbSDimitrij Mijoski {10, 4, 6, 3, 'z', 8},
234390840fbSDimitrij Mijoski
235390840fbSDimitrij Mijoski // replace second trailing byte with invalid byte
236390840fbSDimitrij Mijoski {6, 4, 3, 2, 0xFF, 5},
237390840fbSDimitrij Mijoski {10, 4, 6, 3, 0xFF, 8},
238390840fbSDimitrij Mijoski
239390840fbSDimitrij Mijoski // replace third trailing byte
240390840fbSDimitrij Mijoski {10, 4, 6, 3, 'z', 9},
241390840fbSDimitrij Mijoski {10, 4, 6, 3, 0xFF, 9},
242390840fbSDimitrij Mijoski
243390840fbSDimitrij Mijoski // 2.1 The following test-cases raise doubt whether error or partial should
244390840fbSDimitrij Mijoski // be returned. For example, we have 4-byte sequence with valid leading
245390840fbSDimitrij Mijoski // byte. If we hide the last byte we need to return partial. But, if the
246390840fbSDimitrij Mijoski // second or third byte, which are visible to the call to codecvt, are
247390840fbSDimitrij Mijoski // malformed then error should be returned.
248390840fbSDimitrij Mijoski
249390840fbSDimitrij Mijoski // replace first trailing byte with ASCII byte, also incomplete at end
250390840fbSDimitrij Mijoski {5, 4, 3, 2, 'z', 4},
251390840fbSDimitrij Mijoski {8, 4, 6, 3, 'z', 7},
252390840fbSDimitrij Mijoski {9, 4, 6, 3, 'z', 7},
253390840fbSDimitrij Mijoski
254390840fbSDimitrij Mijoski // replace first trailing byte with invalid byte, also incomplete at end
255390840fbSDimitrij Mijoski {5, 4, 3, 2, 0xFF, 4},
256390840fbSDimitrij Mijoski {8, 4, 6, 3, 0xFF, 7},
257390840fbSDimitrij Mijoski {9, 4, 6, 3, 0xFF, 7},
258390840fbSDimitrij Mijoski
259390840fbSDimitrij Mijoski // replace second trailing byte with ASCII byte, also incomplete at end
260390840fbSDimitrij Mijoski {9, 4, 6, 3, 'z', 8},
261390840fbSDimitrij Mijoski
262390840fbSDimitrij Mijoski // replace second trailing byte with invalid byte, also incomplete at end
263390840fbSDimitrij Mijoski {9, 4, 6, 3, 0xFF, 8},
264390840fbSDimitrij Mijoski
265390840fbSDimitrij Mijoski // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
266390840fbSDimitrij Mijoski // CP U+D700
267390840fbSDimitrij Mijoski {6, 4, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
268390840fbSDimitrij Mijoski {6, 4, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
269390840fbSDimitrij Mijoski {6, 4, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
270390840fbSDimitrij Mijoski {6, 4, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
271390840fbSDimitrij Mijoski
272390840fbSDimitrij Mijoski // 4. Overlong sequence. The CPs in the input are chosen such as modifying
273390840fbSDimitrij Mijoski // just the leading byte is enough to make them overlong, i.e. for the
274390840fbSDimitrij Mijoski // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
275390840fbSDimitrij Mijoski // zeroes.
276390840fbSDimitrij Mijoski {3, 4, 1, 1, 0b11000000, 1}, // make the 2-byte CP overlong
277390840fbSDimitrij Mijoski {3, 4, 1, 1, 0b11000001, 1}, // make the 2-byte CP overlong
278390840fbSDimitrij Mijoski {6, 4, 3, 2, 0b11100000, 3}, // make the 3-byte CP overlong
279390840fbSDimitrij Mijoski {10, 4, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
280390840fbSDimitrij Mijoski
281390840fbSDimitrij Mijoski // 5. CP above range
282390840fbSDimitrij Mijoski // turn U+10AAAA into U+14AAAA by changing its leading byte
283390840fbSDimitrij Mijoski {10, 4, 6, 3, 0b11110101, 6},
284390840fbSDimitrij Mijoski // turn U+10AAAA into U+11AAAA by changing its 2nd byte
285390840fbSDimitrij Mijoski {10, 4, 6, 3, 0b10011010, 7},
286390840fbSDimitrij Mijoski };
287390840fbSDimitrij Mijoski for (test_offsets_error<unsigned char>* it = begin(offsets); it != end(offsets); ++it) {
288390840fbSDimitrij Mijoski test_offsets_error<unsigned char> t = *it;
289390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
290390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
291390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
292390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
293390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
294390840fbSDimitrij Mijoski ExternT old_char = in[t.replace_pos];
295390840fbSDimitrij Mijoski in[t.replace_pos] = t.replace_char;
296390840fbSDimitrij Mijoski
297390840fbSDimitrij Mijoski mbstate_t state = {};
298390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
299390840fbSDimitrij Mijoski InternT* out_next = nullptr;
300390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
301390840fbSDimitrij Mijoski
302390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
303390840fbSDimitrij Mijoski assert(res == cvt.error);
304390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
305390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
306390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
307390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
308390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
309390840fbSDimitrij Mijoski
310390840fbSDimitrij Mijoski state = mbstate_t();
311390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
312390840fbSDimitrij Mijoski assert(len >= 0);
313390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
314390840fbSDimitrij Mijoski
315390840fbSDimitrij Mijoski in[t.replace_pos] = old_char;
316390840fbSDimitrij Mijoski }
317390840fbSDimitrij Mijoski }
318390840fbSDimitrij Mijoski
319390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_utf32_in(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)320390840fbSDimitrij Mijoski void utf8_to_utf32_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
321390840fbSDimitrij Mijoski utf8_to_utf32_in_ok(cvt);
322390840fbSDimitrij Mijoski utf8_to_utf32_in_partial(cvt);
323390840fbSDimitrij Mijoski utf8_to_utf32_in_error(cvt);
324390840fbSDimitrij Mijoski }
325390840fbSDimitrij Mijoski
326390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf32_to_utf8_out_ok(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)327390840fbSDimitrij Mijoski void utf32_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
328390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
329390840fbSDimitrij Mijoski const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
330390840fbSDimitrij Mijoski const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
331390840fbSDimitrij Mijoski static_assert(array_size(input) == 5, "");
332390840fbSDimitrij Mijoski static_assert(array_size(expected) == 11, "");
333390840fbSDimitrij Mijoski
334390840fbSDimitrij Mijoski InternT in[array_size(input)];
335390840fbSDimitrij Mijoski ExternT exp[array_size(expected)];
336390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
337390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
338390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(in) == 4);
339390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(exp) == 10);
340390840fbSDimitrij Mijoski
341390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {4, 10}};
342390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
343390840fbSDimitrij Mijoski test_offsets_ok t = *it;
344390840fbSDimitrij Mijoski ExternT out[array_size(exp) - 1] = {};
345390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
346390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
347390840fbSDimitrij Mijoski mbstate_t state = {};
348390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
349390840fbSDimitrij Mijoski ExternT* out_next = nullptr;
350390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
351390840fbSDimitrij Mijoski
352390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
353390840fbSDimitrij Mijoski assert(res == cvt.ok);
354390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
355390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
356390840fbSDimitrij Mijoski assert(char_traits<ExternT>::compare(out, exp, t.out_size) == 0);
357390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
358390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
359390840fbSDimitrij Mijoski }
360390840fbSDimitrij Mijoski }
361390840fbSDimitrij Mijoski
362390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf32_to_utf8_out_partial(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)363390840fbSDimitrij Mijoski void utf32_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
364390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
365390840fbSDimitrij Mijoski const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
366390840fbSDimitrij Mijoski const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
367390840fbSDimitrij Mijoski static_assert(array_size(input) == 5, "");
368390840fbSDimitrij Mijoski static_assert(array_size(expected) == 11, "");
369390840fbSDimitrij Mijoski
370390840fbSDimitrij Mijoski InternT in[array_size(input)];
371390840fbSDimitrij Mijoski ExternT exp[array_size(expected)];
372390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
373390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
374390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(in) == 4);
375390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(exp) == 10);
376390840fbSDimitrij Mijoski
377390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
378390840fbSDimitrij Mijoski {1, 0, 0, 0}, // no space for first CP
379390840fbSDimitrij Mijoski
380390840fbSDimitrij Mijoski {2, 1, 1, 1}, // no space for second CP
381390840fbSDimitrij Mijoski {2, 2, 1, 1}, // no space for second CP
382390840fbSDimitrij Mijoski
383390840fbSDimitrij Mijoski {3, 3, 2, 3}, // no space for third CP
384390840fbSDimitrij Mijoski {3, 4, 2, 3}, // no space for third CP
385390840fbSDimitrij Mijoski {3, 5, 2, 3}, // no space for third CP
386390840fbSDimitrij Mijoski
387390840fbSDimitrij Mijoski {4, 6, 3, 6}, // no space for fourth CP
388390840fbSDimitrij Mijoski {4, 7, 3, 6}, // no space for fourth CP
389390840fbSDimitrij Mijoski {4, 8, 3, 6}, // no space for fourth CP
390390840fbSDimitrij Mijoski {4, 9, 3, 6}, // no space for fourth CP
391390840fbSDimitrij Mijoski };
392390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
393390840fbSDimitrij Mijoski test_offsets_partial t = *it;
394390840fbSDimitrij Mijoski ExternT out[array_size(exp) - 1] = {};
395390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
396390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
397390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
398390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
399390840fbSDimitrij Mijoski mbstate_t state = {};
400390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
401390840fbSDimitrij Mijoski ExternT* out_next = nullptr;
402390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
403390840fbSDimitrij Mijoski
404390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
405390840fbSDimitrij Mijoski assert(res == cvt.partial);
406390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
407390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
408390840fbSDimitrij Mijoski assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
409390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
410390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
411390840fbSDimitrij Mijoski }
412390840fbSDimitrij Mijoski }
413390840fbSDimitrij Mijoski
414390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf32_to_utf8_out_error(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)415390840fbSDimitrij Mijoski void utf32_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
416390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
417390840fbSDimitrij Mijoski const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
418390840fbSDimitrij Mijoski const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
419390840fbSDimitrij Mijoski static_assert(array_size(input) == 5, "");
420390840fbSDimitrij Mijoski static_assert(array_size(expected) == 11, "");
421390840fbSDimitrij Mijoski
422390840fbSDimitrij Mijoski InternT in[array_size(input)];
423390840fbSDimitrij Mijoski ExternT exp[array_size(expected)];
424390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
425390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
426390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(in) == 4);
427390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(exp) == 10);
428390840fbSDimitrij Mijoski
429390840fbSDimitrij Mijoski test_offsets_error<InternT> offsets[] = {
430390840fbSDimitrij Mijoski
431390840fbSDimitrij Mijoski // Surrogate CP
432390840fbSDimitrij Mijoski {4, 10, 0, 0, 0xD800, 0},
433390840fbSDimitrij Mijoski {4, 10, 1, 1, 0xDBFF, 1},
434390840fbSDimitrij Mijoski {4, 10, 2, 3, 0xDC00, 2},
435390840fbSDimitrij Mijoski {4, 10, 3, 6, 0xDFFF, 3},
436390840fbSDimitrij Mijoski
437390840fbSDimitrij Mijoski // CP out of range
438390840fbSDimitrij Mijoski {4, 10, 0, 0, 0x00110000, 0},
439390840fbSDimitrij Mijoski {4, 10, 1, 1, 0x00110000, 1},
440390840fbSDimitrij Mijoski {4, 10, 2, 3, 0x00110000, 2},
441390840fbSDimitrij Mijoski {4, 10, 3, 6, 0x00110000, 3}};
442390840fbSDimitrij Mijoski
443390840fbSDimitrij Mijoski for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
444390840fbSDimitrij Mijoski test_offsets_error<InternT> t = *it;
445390840fbSDimitrij Mijoski ExternT out[array_size(exp) - 1] = {};
446390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
447390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
448390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
449390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
450390840fbSDimitrij Mijoski InternT old_char = in[t.replace_pos];
451390840fbSDimitrij Mijoski in[t.replace_pos] = t.replace_char;
452390840fbSDimitrij Mijoski
453390840fbSDimitrij Mijoski mbstate_t state = {};
454390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
455390840fbSDimitrij Mijoski ExternT* out_next = nullptr;
456390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
457390840fbSDimitrij Mijoski
458390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
459390840fbSDimitrij Mijoski assert(res == cvt.error);
460390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
461390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
462390840fbSDimitrij Mijoski assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
463390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
464390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
465390840fbSDimitrij Mijoski
466390840fbSDimitrij Mijoski in[t.replace_pos] = old_char;
467390840fbSDimitrij Mijoski }
468390840fbSDimitrij Mijoski }
469390840fbSDimitrij Mijoski
470390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf32_to_utf8_out(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)471390840fbSDimitrij Mijoski void utf32_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
472390840fbSDimitrij Mijoski utf32_to_utf8_out_ok(cvt);
473390840fbSDimitrij Mijoski utf32_to_utf8_out_partial(cvt);
474390840fbSDimitrij Mijoski utf32_to_utf8_out_error(cvt);
475390840fbSDimitrij Mijoski }
476390840fbSDimitrij Mijoski
477390840fbSDimitrij Mijoski template <class InternT, class ExternT>
test_utf8_utf32_cvt(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)478390840fbSDimitrij Mijoski void test_utf8_utf32_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
479390840fbSDimitrij Mijoski utf8_to_utf32_in(cvt);
480390840fbSDimitrij Mijoski utf32_to_utf8_out(cvt);
481390840fbSDimitrij Mijoski }
482390840fbSDimitrij Mijoski
483390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_utf16_in_ok(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)484390840fbSDimitrij Mijoski void utf8_to_utf16_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
485390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
486390840fbSDimitrij Mijoski const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
487390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
488390840fbSDimitrij Mijoski static_assert(array_size(input) == 11, "");
489390840fbSDimitrij Mijoski static_assert(array_size(expected) == 6, "");
490390840fbSDimitrij Mijoski
491390840fbSDimitrij Mijoski ExternT in[array_size(input)];
492390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
493390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
494390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
495390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(in) == 10);
496390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(exp) == 5);
497390840fbSDimitrij Mijoski
498390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 5}};
499390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
500390840fbSDimitrij Mijoski test_offsets_ok t = *it;
501390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
502390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
503390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
504390840fbSDimitrij Mijoski mbstate_t state = {};
505390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
506390840fbSDimitrij Mijoski InternT* out_next = nullptr;
507390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
508390840fbSDimitrij Mijoski
509390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
510390840fbSDimitrij Mijoski assert(res == cvt.ok);
511390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
512390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
513390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
514390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
515390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
516390840fbSDimitrij Mijoski
517390840fbSDimitrij Mijoski state = mbstate_t();
518390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
519390840fbSDimitrij Mijoski assert(len >= 0);
520390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
521390840fbSDimitrij Mijoski }
522390840fbSDimitrij Mijoski
523390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
524390840fbSDimitrij Mijoski test_offsets_ok t = *it;
525390840fbSDimitrij Mijoski InternT out[array_size(exp)] = {};
526390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
527390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
528390840fbSDimitrij Mijoski mbstate_t state = {};
529390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
530390840fbSDimitrij Mijoski InternT* out_next = nullptr;
531390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
532390840fbSDimitrij Mijoski
533390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
534390840fbSDimitrij Mijoski assert(res == cvt.ok);
535390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
536390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
537390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
538390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
539390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
540390840fbSDimitrij Mijoski
541390840fbSDimitrij Mijoski state = mbstate_t();
542390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, array_size(out));
543390840fbSDimitrij Mijoski assert(len >= 0);
544390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
545390840fbSDimitrij Mijoski }
546390840fbSDimitrij Mijoski }
547390840fbSDimitrij Mijoski
548390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_utf16_in_partial(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)549390840fbSDimitrij Mijoski void utf8_to_utf16_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
550390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
551390840fbSDimitrij Mijoski const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
552390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
553390840fbSDimitrij Mijoski static_assert(array_size(input) == 11, "");
554390840fbSDimitrij Mijoski static_assert(array_size(expected) == 6, "");
555390840fbSDimitrij Mijoski
556390840fbSDimitrij Mijoski ExternT in[array_size(input)];
557390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
558390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
559390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
560390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(in) == 10);
561390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(exp) == 5);
562390840fbSDimitrij Mijoski
563390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
564390840fbSDimitrij Mijoski {1, 0, 0, 0}, // no space for first CP
565390840fbSDimitrij Mijoski
566390840fbSDimitrij Mijoski {3, 1, 1, 1}, // no space for second CP
567390840fbSDimitrij Mijoski {2, 2, 1, 1}, // incomplete second CP
568390840fbSDimitrij Mijoski {2, 1, 1, 1}, // incomplete second CP, and no space for it
569390840fbSDimitrij Mijoski
570390840fbSDimitrij Mijoski {6, 2, 3, 2}, // no space for third CP
571390840fbSDimitrij Mijoski {4, 3, 3, 2}, // incomplete third CP
572390840fbSDimitrij Mijoski {5, 3, 3, 2}, // incomplete third CP
573390840fbSDimitrij Mijoski {4, 2, 3, 2}, // incomplete third CP, and no space for it
574390840fbSDimitrij Mijoski {5, 2, 3, 2}, // incomplete third CP, and no space for it
575390840fbSDimitrij Mijoski
576390840fbSDimitrij Mijoski {10, 3, 6, 3}, // no space for fourth CP
577390840fbSDimitrij Mijoski {10, 4, 6, 3}, // no space for fourth CP
578390840fbSDimitrij Mijoski {7, 5, 6, 3}, // incomplete fourth CP
579390840fbSDimitrij Mijoski {8, 5, 6, 3}, // incomplete fourth CP
580390840fbSDimitrij Mijoski {9, 5, 6, 3}, // incomplete fourth CP
581390840fbSDimitrij Mijoski {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
582390840fbSDimitrij Mijoski {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
583390840fbSDimitrij Mijoski {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
584390840fbSDimitrij Mijoski {7, 4, 6, 3}, // incomplete fourth CP, and no space for it
585390840fbSDimitrij Mijoski {8, 4, 6, 3}, // incomplete fourth CP, and no space for it
586390840fbSDimitrij Mijoski {9, 4, 6, 3}, // incomplete fourth CP, and no space for it
587390840fbSDimitrij Mijoski
588390840fbSDimitrij Mijoski };
589390840fbSDimitrij Mijoski
590390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
591390840fbSDimitrij Mijoski test_offsets_partial t = *it;
592390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
593390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
594390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
595390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
596390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
597390840fbSDimitrij Mijoski mbstate_t state = {};
598390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
599390840fbSDimitrij Mijoski InternT* out_next = nullptr;
600390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
601390840fbSDimitrij Mijoski
602390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
603390840fbSDimitrij Mijoski assert(res == cvt.partial);
604390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
605390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
606390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
607390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
608390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
609390840fbSDimitrij Mijoski
610390840fbSDimitrij Mijoski state = mbstate_t();
611390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
612390840fbSDimitrij Mijoski assert(len >= 0);
613390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
614390840fbSDimitrij Mijoski }
615390840fbSDimitrij Mijoski }
616390840fbSDimitrij Mijoski
617390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_utf16_in_error(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)618390840fbSDimitrij Mijoski void utf8_to_utf16_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
619390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
620390840fbSDimitrij Mijoski const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
621390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xD700, 0xDBEA, 0xDEAA, 0};
622390840fbSDimitrij Mijoski static_assert(array_size(input) == 11, "");
623390840fbSDimitrij Mijoski static_assert(array_size(expected) == 6, "");
624390840fbSDimitrij Mijoski
625390840fbSDimitrij Mijoski ExternT in[array_size(input)];
626390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
627390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
628390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
629390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(in) == 10);
630390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(exp) == 5);
631390840fbSDimitrij Mijoski
632390840fbSDimitrij Mijoski // There are 5 classes of errors in UTF-8 decoding
633390840fbSDimitrij Mijoski // 1. Missing leading byte
634390840fbSDimitrij Mijoski // 2. Missing trailing byte
635390840fbSDimitrij Mijoski // 3. Surrogate CP
636390840fbSDimitrij Mijoski // 4. Overlong sequence
637390840fbSDimitrij Mijoski // 5. CP out of Unicode range
638390840fbSDimitrij Mijoski test_offsets_error<unsigned char> offsets[] = {
639390840fbSDimitrij Mijoski
640390840fbSDimitrij Mijoski // 1. Missing leading byte. We will replace the leading byte with
641390840fbSDimitrij Mijoski // non-leading byte, such as a byte that is always invalid or a trailing
642390840fbSDimitrij Mijoski // byte.
643390840fbSDimitrij Mijoski
644390840fbSDimitrij Mijoski // replace leading byte with invalid byte
645390840fbSDimitrij Mijoski {1, 5, 0, 0, 0xFF, 0},
646390840fbSDimitrij Mijoski {3, 5, 1, 1, 0xFF, 1},
647390840fbSDimitrij Mijoski {6, 5, 3, 2, 0xFF, 3},
648390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xFF, 6},
649390840fbSDimitrij Mijoski
650390840fbSDimitrij Mijoski // replace leading byte with trailing byte
651390840fbSDimitrij Mijoski {1, 5, 0, 0, 0b10101010, 0},
652390840fbSDimitrij Mijoski {3, 5, 1, 1, 0b10101010, 1},
653390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10101010, 3},
654390840fbSDimitrij Mijoski {10, 5, 6, 3, 0b10101010, 6},
655390840fbSDimitrij Mijoski
656390840fbSDimitrij Mijoski // 2. Missing trailing byte. We will replace the trailing byte with
657390840fbSDimitrij Mijoski // non-trailing byte, such as a byte that is always invalid or a leading
658390840fbSDimitrij Mijoski // byte (simple ASCII byte in our case).
659390840fbSDimitrij Mijoski
660390840fbSDimitrij Mijoski // replace first trailing byte with ASCII byte
661390840fbSDimitrij Mijoski {3, 5, 1, 1, 'z', 2},
662390840fbSDimitrij Mijoski {6, 5, 3, 2, 'z', 4},
663390840fbSDimitrij Mijoski {10, 5, 6, 3, 'z', 7},
664390840fbSDimitrij Mijoski
665390840fbSDimitrij Mijoski // replace first trailing byte with invalid byte
666390840fbSDimitrij Mijoski {3, 5, 1, 1, 0xFF, 2},
667390840fbSDimitrij Mijoski {6, 5, 3, 2, 0xFF, 4},
668390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xFF, 7},
669390840fbSDimitrij Mijoski
670390840fbSDimitrij Mijoski // replace second trailing byte with ASCII byte
671390840fbSDimitrij Mijoski {6, 5, 3, 2, 'z', 5},
672390840fbSDimitrij Mijoski {10, 5, 6, 3, 'z', 8},
673390840fbSDimitrij Mijoski
674390840fbSDimitrij Mijoski // replace second trailing byte with invalid byte
675390840fbSDimitrij Mijoski {6, 5, 3, 2, 0xFF, 5},
676390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xFF, 8},
677390840fbSDimitrij Mijoski
678390840fbSDimitrij Mijoski // replace third trailing byte
679390840fbSDimitrij Mijoski {10, 5, 6, 3, 'z', 9},
680390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xFF, 9},
681390840fbSDimitrij Mijoski
682390840fbSDimitrij Mijoski // 2.1 The following test-cases raise doubt whether error or partial should
683390840fbSDimitrij Mijoski // be returned. For example, we have 4-byte sequence with valid leading
684390840fbSDimitrij Mijoski // byte. If we hide the last byte we need to return partial. But, if the
685390840fbSDimitrij Mijoski // second or third byte, which are visible to the call to codecvt, are
686390840fbSDimitrij Mijoski // malformed then error should be returned.
687390840fbSDimitrij Mijoski
688390840fbSDimitrij Mijoski // replace first trailing byte with ASCII byte, also incomplete at end
689390840fbSDimitrij Mijoski {5, 5, 3, 2, 'z', 4},
690390840fbSDimitrij Mijoski {8, 5, 6, 3, 'z', 7},
691390840fbSDimitrij Mijoski {9, 5, 6, 3, 'z', 7},
692390840fbSDimitrij Mijoski
693390840fbSDimitrij Mijoski // replace first trailing byte with invalid byte, also incomplete at end
694390840fbSDimitrij Mijoski {5, 5, 3, 2, 0xFF, 4},
695390840fbSDimitrij Mijoski {8, 5, 6, 3, 0xFF, 7},
696390840fbSDimitrij Mijoski {9, 5, 6, 3, 0xFF, 7},
697390840fbSDimitrij Mijoski
698390840fbSDimitrij Mijoski // replace second trailing byte with ASCII byte, also incomplete at end
699390840fbSDimitrij Mijoski {9, 5, 6, 3, 'z', 8},
700390840fbSDimitrij Mijoski
701390840fbSDimitrij Mijoski // replace second trailing byte with invalid byte, also incomplete at end
702390840fbSDimitrij Mijoski {9, 5, 6, 3, 0xFF, 8},
703390840fbSDimitrij Mijoski
704390840fbSDimitrij Mijoski // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
705390840fbSDimitrij Mijoski // CP U+D700
706390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
707390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
708390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
709390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
710390840fbSDimitrij Mijoski
711390840fbSDimitrij Mijoski // 4. Overlong sequence. The CPs in the input are chosen such as modifying
712390840fbSDimitrij Mijoski // just the leading byte is enough to make them overlong, i.e. for the
713390840fbSDimitrij Mijoski // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
714390840fbSDimitrij Mijoski // zeroes.
715390840fbSDimitrij Mijoski {3, 5, 1, 1, 0b11000000, 1}, // make the 2-byte CP overlong
716390840fbSDimitrij Mijoski {3, 5, 1, 1, 0b11000001, 1}, // make the 2-byte CP overlong
717390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b11100000, 3}, // make the 3-byte CP overlong
718390840fbSDimitrij Mijoski {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
719390840fbSDimitrij Mijoski
720390840fbSDimitrij Mijoski // 5. CP above range
721390840fbSDimitrij Mijoski // turn U+10AAAA into U+14AAAA by changing its leading byte
722390840fbSDimitrij Mijoski {10, 5, 6, 3, 0b11110101, 6},
723390840fbSDimitrij Mijoski // turn U+10AAAA into U+11AAAA by changing its 2nd byte
724390840fbSDimitrij Mijoski {10, 5, 6, 3, 0b10011010, 7},
725390840fbSDimitrij Mijoski };
726390840fbSDimitrij Mijoski for (test_offsets_error<unsigned char>* it = begin(offsets); it != end(offsets); ++it) {
727390840fbSDimitrij Mijoski test_offsets_error<unsigned char> t = *it;
728390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
729390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
730390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
731390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
732390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
733390840fbSDimitrij Mijoski ExternT old_char = in[t.replace_pos];
734390840fbSDimitrij Mijoski in[t.replace_pos] = t.replace_char;
735390840fbSDimitrij Mijoski
736390840fbSDimitrij Mijoski mbstate_t state = {};
737390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
738390840fbSDimitrij Mijoski InternT* out_next = nullptr;
739390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
740390840fbSDimitrij Mijoski
741390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
742390840fbSDimitrij Mijoski assert(res == cvt.error);
743390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
744390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
745390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
746390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
747390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
748390840fbSDimitrij Mijoski
749390840fbSDimitrij Mijoski state = mbstate_t();
750390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
751390840fbSDimitrij Mijoski assert(len >= 0);
752390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
753390840fbSDimitrij Mijoski
754390840fbSDimitrij Mijoski in[t.replace_pos] = old_char;
755390840fbSDimitrij Mijoski }
756390840fbSDimitrij Mijoski }
757390840fbSDimitrij Mijoski
758390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_utf16_in(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)759390840fbSDimitrij Mijoski void utf8_to_utf16_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
760390840fbSDimitrij Mijoski utf8_to_utf16_in_ok(cvt);
761390840fbSDimitrij Mijoski utf8_to_utf16_in_partial(cvt);
762390840fbSDimitrij Mijoski utf8_to_utf16_in_error(cvt);
763390840fbSDimitrij Mijoski }
764390840fbSDimitrij Mijoski
765390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf16_to_utf8_out_ok(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)766390840fbSDimitrij Mijoski void utf16_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
767390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
768390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
769390840fbSDimitrij Mijoski const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
770390840fbSDimitrij Mijoski static_assert(array_size(input) == 6, "");
771390840fbSDimitrij Mijoski static_assert(array_size(expected) == 11, "");
772390840fbSDimitrij Mijoski
773390840fbSDimitrij Mijoski InternT in[array_size(input)];
774390840fbSDimitrij Mijoski ExternT exp[array_size(expected)];
775390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
776390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
777390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(in) == 5);
778390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(exp) == 10);
779390840fbSDimitrij Mijoski
780390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {5, 10}};
781390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
782390840fbSDimitrij Mijoski test_offsets_ok t = *it;
783390840fbSDimitrij Mijoski ExternT out[array_size(exp) - 1] = {};
784390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
785390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
786390840fbSDimitrij Mijoski mbstate_t state = {};
787390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
788390840fbSDimitrij Mijoski ExternT* out_next = nullptr;
789390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
790390840fbSDimitrij Mijoski
791390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
792390840fbSDimitrij Mijoski assert(res == cvt.ok);
793390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
794390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
795390840fbSDimitrij Mijoski assert(char_traits<ExternT>::compare(out, exp, t.out_size) == 0);
796390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
797390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
798390840fbSDimitrij Mijoski }
799390840fbSDimitrij Mijoski }
800390840fbSDimitrij Mijoski
801390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf16_to_utf8_out_partial(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)802390840fbSDimitrij Mijoski void utf16_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
803390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
804390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
805390840fbSDimitrij Mijoski const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
806390840fbSDimitrij Mijoski static_assert(array_size(input) == 6, "");
807390840fbSDimitrij Mijoski static_assert(array_size(expected) == 11, "");
808390840fbSDimitrij Mijoski
809390840fbSDimitrij Mijoski InternT in[array_size(input)];
810390840fbSDimitrij Mijoski ExternT exp[array_size(expected)];
811390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
812390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
813390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(in) == 5);
814390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(exp) == 10);
815390840fbSDimitrij Mijoski
816390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
817390840fbSDimitrij Mijoski {1, 0, 0, 0}, // no space for first CP
818390840fbSDimitrij Mijoski
819390840fbSDimitrij Mijoski {2, 1, 1, 1}, // no space for second CP
820390840fbSDimitrij Mijoski {2, 2, 1, 1}, // no space for second CP
821390840fbSDimitrij Mijoski
822390840fbSDimitrij Mijoski {3, 3, 2, 3}, // no space for third CP
823390840fbSDimitrij Mijoski {3, 4, 2, 3}, // no space for third CP
824390840fbSDimitrij Mijoski {3, 5, 2, 3}, // no space for third CP
825390840fbSDimitrij Mijoski
826390840fbSDimitrij Mijoski {5, 6, 3, 6}, // no space for fourth CP
827390840fbSDimitrij Mijoski {5, 7, 3, 6}, // no space for fourth CP
828390840fbSDimitrij Mijoski {5, 8, 3, 6}, // no space for fourth CP
829390840fbSDimitrij Mijoski {5, 9, 3, 6}, // no space for fourth CP
830390840fbSDimitrij Mijoski
831390840fbSDimitrij Mijoski {4, 10, 3, 6}, // incomplete fourth CP
832390840fbSDimitrij Mijoski
833390840fbSDimitrij Mijoski {4, 6, 3, 6}, // incomplete fourth CP, and no space for it
834390840fbSDimitrij Mijoski {4, 7, 3, 6}, // incomplete fourth CP, and no space for it
835390840fbSDimitrij Mijoski {4, 8, 3, 6}, // incomplete fourth CP, and no space for it
836390840fbSDimitrij Mijoski {4, 9, 3, 6}, // incomplete fourth CP, and no space for it
837390840fbSDimitrij Mijoski };
838390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
839390840fbSDimitrij Mijoski test_offsets_partial t = *it;
840390840fbSDimitrij Mijoski ExternT out[array_size(exp) - 1] = {};
841390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
842390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
843390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
844390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
845390840fbSDimitrij Mijoski mbstate_t state = {};
846390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
847390840fbSDimitrij Mijoski ExternT* out_next = nullptr;
848390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
849390840fbSDimitrij Mijoski
850390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
851390840fbSDimitrij Mijoski assert(res == cvt.partial);
852390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
853390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
854390840fbSDimitrij Mijoski assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
855390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
856390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
857390840fbSDimitrij Mijoski }
858390840fbSDimitrij Mijoski }
859390840fbSDimitrij Mijoski
860390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf16_to_utf8_out_error(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)861390840fbSDimitrij Mijoski void utf16_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
862390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
863390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
864390840fbSDimitrij Mijoski const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
865390840fbSDimitrij Mijoski static_assert(array_size(input) == 6, "");
866390840fbSDimitrij Mijoski static_assert(array_size(expected) == 11, "");
867390840fbSDimitrij Mijoski
868390840fbSDimitrij Mijoski InternT in[array_size(input)];
869390840fbSDimitrij Mijoski ExternT exp[array_size(expected)];
870390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
871390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
872390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(in) == 5);
873390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(exp) == 10);
874390840fbSDimitrij Mijoski
875390840fbSDimitrij Mijoski // The only possible error in UTF-16 is unpaired surrogate code units.
876390840fbSDimitrij Mijoski // So we replace valid code points (scalar values) with lone surrogate CU.
877390840fbSDimitrij Mijoski test_offsets_error<InternT> offsets[] = {
878390840fbSDimitrij Mijoski {5, 10, 0, 0, 0xD800, 0},
879390840fbSDimitrij Mijoski {5, 10, 0, 0, 0xDBFF, 0},
880390840fbSDimitrij Mijoski {5, 10, 0, 0, 0xDC00, 0},
881390840fbSDimitrij Mijoski {5, 10, 0, 0, 0xDFFF, 0},
882390840fbSDimitrij Mijoski
883390840fbSDimitrij Mijoski {5, 10, 1, 1, 0xD800, 1},
884390840fbSDimitrij Mijoski {5, 10, 1, 1, 0xDBFF, 1},
885390840fbSDimitrij Mijoski {5, 10, 1, 1, 0xDC00, 1},
886390840fbSDimitrij Mijoski {5, 10, 1, 1, 0xDFFF, 1},
887390840fbSDimitrij Mijoski
888390840fbSDimitrij Mijoski {5, 10, 2, 3, 0xD800, 2},
889390840fbSDimitrij Mijoski {5, 10, 2, 3, 0xDBFF, 2},
890390840fbSDimitrij Mijoski {5, 10, 2, 3, 0xDC00, 2},
891390840fbSDimitrij Mijoski {5, 10, 2, 3, 0xDFFF, 2},
892390840fbSDimitrij Mijoski
893390840fbSDimitrij Mijoski // make the leading surrogate a trailing one
894390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xDC00, 3},
895390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xDFFF, 3},
896390840fbSDimitrij Mijoski
897390840fbSDimitrij Mijoski // make the trailing surrogate a leading one
898390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xD800, 4},
899390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xDBFF, 4},
900390840fbSDimitrij Mijoski
901390840fbSDimitrij Mijoski // make the trailing surrogate a BMP char
902390840fbSDimitrij Mijoski {5, 10, 3, 6, 'z', 4},
903390840fbSDimitrij Mijoski };
904390840fbSDimitrij Mijoski
905390840fbSDimitrij Mijoski for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
906390840fbSDimitrij Mijoski test_offsets_error<InternT> t = *it;
907390840fbSDimitrij Mijoski ExternT out[array_size(exp) - 1] = {};
908390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
909390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
910390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
911390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
912390840fbSDimitrij Mijoski InternT old_char = in[t.replace_pos];
913390840fbSDimitrij Mijoski in[t.replace_pos] = t.replace_char;
914390840fbSDimitrij Mijoski
915390840fbSDimitrij Mijoski mbstate_t state = {};
916390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
917390840fbSDimitrij Mijoski ExternT* out_next = nullptr;
918390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
919390840fbSDimitrij Mijoski
920390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
921390840fbSDimitrij Mijoski assert(res == cvt.error);
922390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
923390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
924390840fbSDimitrij Mijoski assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
925390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
926390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
927390840fbSDimitrij Mijoski
928390840fbSDimitrij Mijoski in[t.replace_pos] = old_char;
929390840fbSDimitrij Mijoski }
930390840fbSDimitrij Mijoski }
931390840fbSDimitrij Mijoski
932390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf16_to_utf8_out(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)933390840fbSDimitrij Mijoski void utf16_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
934390840fbSDimitrij Mijoski utf16_to_utf8_out_ok(cvt);
935390840fbSDimitrij Mijoski utf16_to_utf8_out_partial(cvt);
936390840fbSDimitrij Mijoski utf16_to_utf8_out_error(cvt);
937390840fbSDimitrij Mijoski }
938390840fbSDimitrij Mijoski
939390840fbSDimitrij Mijoski template <class InternT, class ExternT>
test_utf8_utf16_cvt(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)940390840fbSDimitrij Mijoski void test_utf8_utf16_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
941390840fbSDimitrij Mijoski utf8_to_utf16_in(cvt);
942390840fbSDimitrij Mijoski utf16_to_utf8_out(cvt);
943390840fbSDimitrij Mijoski }
944390840fbSDimitrij Mijoski
945390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_ucs2_in_ok(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)946390840fbSDimitrij Mijoski void utf8_to_ucs2_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
947390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
948390840fbSDimitrij Mijoski const unsigned char input[] = "b\u0448\uAAAA";
949390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
950390840fbSDimitrij Mijoski static_assert(array_size(input) == 7, "");
951390840fbSDimitrij Mijoski static_assert(array_size(expected) == 4, "");
952390840fbSDimitrij Mijoski
953390840fbSDimitrij Mijoski ExternT in[array_size(input)];
954390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
955390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
956390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
957390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(in) == 6);
958390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(exp) == 3);
959390840fbSDimitrij Mijoski
960390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}};
961390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
962390840fbSDimitrij Mijoski test_offsets_ok t = *it;
963390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
964390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
965390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
966390840fbSDimitrij Mijoski mbstate_t state = {};
967390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
968390840fbSDimitrij Mijoski InternT* out_next = nullptr;
969390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
970390840fbSDimitrij Mijoski
971390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
972390840fbSDimitrij Mijoski assert(res == cvt.ok);
973390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
974390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
975390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
976390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
977390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
978390840fbSDimitrij Mijoski
979390840fbSDimitrij Mijoski state = mbstate_t();
980390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
981390840fbSDimitrij Mijoski assert(len >= 0);
982390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
983390840fbSDimitrij Mijoski }
984390840fbSDimitrij Mijoski
985390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
986390840fbSDimitrij Mijoski test_offsets_ok t = *it;
987390840fbSDimitrij Mijoski InternT out[array_size(exp)] = {};
988390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
989390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
990390840fbSDimitrij Mijoski mbstate_t state = {};
991390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
992390840fbSDimitrij Mijoski InternT* out_next = nullptr;
993390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
994390840fbSDimitrij Mijoski
995390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
996390840fbSDimitrij Mijoski assert(res == cvt.ok);
997390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
998390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
999390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1000390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
1001390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
1002390840fbSDimitrij Mijoski
1003390840fbSDimitrij Mijoski state = mbstate_t();
1004390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, array_size(out));
1005390840fbSDimitrij Mijoski assert(len >= 0);
1006390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
1007390840fbSDimitrij Mijoski }
1008390840fbSDimitrij Mijoski }
1009390840fbSDimitrij Mijoski
1010390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_ucs2_in_partial(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)1011390840fbSDimitrij Mijoski void utf8_to_ucs2_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1012390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1013390840fbSDimitrij Mijoski const unsigned char input[] = "b\u0448\uAAAA";
1014390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1015390840fbSDimitrij Mijoski static_assert(array_size(input) == 7, "");
1016390840fbSDimitrij Mijoski static_assert(array_size(expected) == 4, "");
1017390840fbSDimitrij Mijoski
1018390840fbSDimitrij Mijoski ExternT in[array_size(input)];
1019390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
1020390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
1021390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1022390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(in) == 6);
1023390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(exp) == 3);
1024390840fbSDimitrij Mijoski
1025390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
1026390840fbSDimitrij Mijoski {1, 0, 0, 0}, // no space for first CP
1027390840fbSDimitrij Mijoski
1028390840fbSDimitrij Mijoski {3, 1, 1, 1}, // no space for second CP
1029390840fbSDimitrij Mijoski {2, 2, 1, 1}, // incomplete second CP
1030390840fbSDimitrij Mijoski {2, 1, 1, 1}, // incomplete second CP, and no space for it
1031390840fbSDimitrij Mijoski
1032390840fbSDimitrij Mijoski {6, 2, 3, 2}, // no space for third CP
1033390840fbSDimitrij Mijoski {4, 3, 3, 2}, // incomplete third CP
1034390840fbSDimitrij Mijoski {5, 3, 3, 2}, // incomplete third CP
1035390840fbSDimitrij Mijoski {4, 2, 3, 2}, // incomplete third CP, and no space for it
1036390840fbSDimitrij Mijoski {5, 2, 3, 2}, // incomplete third CP, and no space for it
1037390840fbSDimitrij Mijoski };
1038390840fbSDimitrij Mijoski
1039390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1040390840fbSDimitrij Mijoski test_offsets_partial t = *it;
1041390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
1042390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1043390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1044390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1045390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1046390840fbSDimitrij Mijoski mbstate_t state = {};
1047390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
1048390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1049390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1050390840fbSDimitrij Mijoski
1051390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1052390840fbSDimitrij Mijoski assert(res == cvt.partial);
1053390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1054390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1055390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1056390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1057390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1058390840fbSDimitrij Mijoski
1059390840fbSDimitrij Mijoski state = mbstate_t();
1060390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
1061390840fbSDimitrij Mijoski assert(len >= 0);
1062390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
1063390840fbSDimitrij Mijoski }
1064390840fbSDimitrij Mijoski }
1065390840fbSDimitrij Mijoski
1066390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_ucs2_in_error(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)1067390840fbSDimitrij Mijoski void utf8_to_ucs2_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1068390840fbSDimitrij Mijoski const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
1069390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xD700, 0xDBEA, 0xDEAA, 0};
1070390840fbSDimitrij Mijoski static_assert(array_size(input) == 11, "");
1071390840fbSDimitrij Mijoski static_assert(array_size(expected) == 6, "");
1072390840fbSDimitrij Mijoski
1073390840fbSDimitrij Mijoski ExternT in[array_size(input)];
1074390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
1075390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
1076390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1077390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(in) == 10);
1078390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(exp) == 5);
1079390840fbSDimitrij Mijoski
1080390840fbSDimitrij Mijoski // There are 5 classes of errors in UTF-8 decoding
1081390840fbSDimitrij Mijoski // 1. Missing leading byte
1082390840fbSDimitrij Mijoski // 2. Missing trailing byte
1083390840fbSDimitrij Mijoski // 3. Surrogate CP
1084390840fbSDimitrij Mijoski // 4. Overlong sequence
1085390840fbSDimitrij Mijoski // 5. CP out of Unicode range
1086390840fbSDimitrij Mijoski test_offsets_error<unsigned char> offsets[] = {
1087390840fbSDimitrij Mijoski
1088390840fbSDimitrij Mijoski // 1. Missing leading byte. We will replace the leading byte with
1089390840fbSDimitrij Mijoski // non-leading byte, such as a byte that is always invalid or a trailing
1090390840fbSDimitrij Mijoski // byte.
1091390840fbSDimitrij Mijoski
1092390840fbSDimitrij Mijoski // replace leading byte with invalid byte
1093390840fbSDimitrij Mijoski {1, 5, 0, 0, 0xFF, 0},
1094390840fbSDimitrij Mijoski {3, 5, 1, 1, 0xFF, 1},
1095390840fbSDimitrij Mijoski {6, 5, 3, 2, 0xFF, 3},
1096390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xFF, 6},
1097390840fbSDimitrij Mijoski
1098390840fbSDimitrij Mijoski // replace leading byte with trailing byte
1099390840fbSDimitrij Mijoski {1, 5, 0, 0, 0b10101010, 0},
1100390840fbSDimitrij Mijoski {3, 5, 1, 1, 0b10101010, 1},
1101390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10101010, 3},
1102390840fbSDimitrij Mijoski {10, 5, 6, 3, 0b10101010, 6},
1103390840fbSDimitrij Mijoski
1104390840fbSDimitrij Mijoski // 2. Missing trailing byte. We will replace the trailing byte with
1105390840fbSDimitrij Mijoski // non-trailing byte, such as a byte that is always invalid or a leading
1106390840fbSDimitrij Mijoski // byte (simple ASCII byte in our case).
1107390840fbSDimitrij Mijoski
1108390840fbSDimitrij Mijoski // replace first trailing byte with ASCII byte
1109390840fbSDimitrij Mijoski {3, 5, 1, 1, 'z', 2},
1110390840fbSDimitrij Mijoski {6, 5, 3, 2, 'z', 4},
1111390840fbSDimitrij Mijoski {10, 5, 6, 3, 'z', 7},
1112390840fbSDimitrij Mijoski
1113390840fbSDimitrij Mijoski // replace first trailing byte with invalid byte
1114390840fbSDimitrij Mijoski {3, 5, 1, 1, 0xFF, 2},
1115390840fbSDimitrij Mijoski {6, 5, 3, 2, 0xFF, 4},
1116390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xFF, 7},
1117390840fbSDimitrij Mijoski
1118390840fbSDimitrij Mijoski // replace second trailing byte with ASCII byte
1119390840fbSDimitrij Mijoski {6, 5, 3, 2, 'z', 5},
1120390840fbSDimitrij Mijoski {10, 5, 6, 3, 'z', 8},
1121390840fbSDimitrij Mijoski
1122390840fbSDimitrij Mijoski // replace second trailing byte with invalid byte
1123390840fbSDimitrij Mijoski {6, 5, 3, 2, 0xFF, 5},
1124390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xFF, 8},
1125390840fbSDimitrij Mijoski
1126390840fbSDimitrij Mijoski // replace third trailing byte
1127390840fbSDimitrij Mijoski {10, 5, 6, 3, 'z', 9},
1128390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xFF, 9},
1129390840fbSDimitrij Mijoski
1130390840fbSDimitrij Mijoski // 2.1 The following test-cases raise doubt whether error or partial should
1131390840fbSDimitrij Mijoski // be returned. For example, we have 4-byte sequence with valid leading
1132390840fbSDimitrij Mijoski // byte. If we hide the last byte we need to return partial. But, if the
1133390840fbSDimitrij Mijoski // second or third byte, which are visible to the call to codecvt, are
1134390840fbSDimitrij Mijoski // malformed then error should be returned.
1135390840fbSDimitrij Mijoski
1136390840fbSDimitrij Mijoski // replace first trailing byte with ASCII byte, also incomplete at end
1137390840fbSDimitrij Mijoski {5, 5, 3, 2, 'z', 4},
1138390840fbSDimitrij Mijoski {8, 5, 6, 3, 'z', 7},
1139390840fbSDimitrij Mijoski {9, 5, 6, 3, 'z', 7},
1140390840fbSDimitrij Mijoski
1141390840fbSDimitrij Mijoski // replace first trailing byte with invalid byte, also incomplete at end
1142390840fbSDimitrij Mijoski {5, 5, 3, 2, 0xFF, 4},
1143390840fbSDimitrij Mijoski {8, 5, 6, 3, 0xFF, 7},
1144390840fbSDimitrij Mijoski {9, 5, 6, 3, 0xFF, 7},
1145390840fbSDimitrij Mijoski
1146390840fbSDimitrij Mijoski // replace second trailing byte with ASCII byte, also incomplete at end
1147390840fbSDimitrij Mijoski {9, 5, 6, 3, 'z', 8},
1148390840fbSDimitrij Mijoski
1149390840fbSDimitrij Mijoski // replace second trailing byte with invalid byte, also incomplete at end
1150390840fbSDimitrij Mijoski {9, 5, 6, 3, 0xFF, 8},
1151390840fbSDimitrij Mijoski
1152390840fbSDimitrij Mijoski // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
1153390840fbSDimitrij Mijoski // CP U+D700
1154390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
1155390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
1156390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
1157390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
1158390840fbSDimitrij Mijoski
1159390840fbSDimitrij Mijoski // 4. Overlong sequence. The CPs in the input are chosen such as modifying
1160390840fbSDimitrij Mijoski // just the leading byte is enough to make them overlong, i.e. for the
1161390840fbSDimitrij Mijoski // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
1162390840fbSDimitrij Mijoski // zeroes.
1163390840fbSDimitrij Mijoski {3, 5, 1, 1, 0b11000000, 1}, // make the 2-byte CP overlong
1164390840fbSDimitrij Mijoski {3, 5, 1, 1, 0b11000001, 1}, // make the 2-byte CP overlong
1165390840fbSDimitrij Mijoski {6, 5, 3, 2, 0b11100000, 3}, // make the 3-byte CP overlong
1166390840fbSDimitrij Mijoski {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
1167390840fbSDimitrij Mijoski
1168390840fbSDimitrij Mijoski // 5. CP above range
1169390840fbSDimitrij Mijoski // turn U+10AAAA into U+14AAAA by changing its leading byte
1170390840fbSDimitrij Mijoski {10, 5, 6, 3, 0b11110101, 6},
1171390840fbSDimitrij Mijoski // turn U+10AAAA into U+11AAAA by changing its 2nd byte
1172390840fbSDimitrij Mijoski {10, 5, 6, 3, 0b10011010, 7},
1173390840fbSDimitrij Mijoski // Don't replace anything, show full 4-byte CP U+10AAAA
1174390840fbSDimitrij Mijoski {10, 4, 6, 3, 'b', 0},
1175390840fbSDimitrij Mijoski {10, 5, 6, 3, 'b', 0},
1176390840fbSDimitrij Mijoski // Don't replace anything, show incomplete 4-byte CP at the end. It's still
1177390840fbSDimitrij Mijoski // out of UCS2 range just by seeing the first byte.
1178390840fbSDimitrij Mijoski {7, 4, 6, 3, 'b', 0}, // incomplete fourth CP
1179390840fbSDimitrij Mijoski {8, 4, 6, 3, 'b', 0}, // incomplete fourth CP
1180390840fbSDimitrij Mijoski {9, 4, 6, 3, 'b', 0}, // incomplete fourth CP
1181390840fbSDimitrij Mijoski {7, 5, 6, 3, 'b', 0}, // incomplete fourth CP
1182390840fbSDimitrij Mijoski {8, 5, 6, 3, 'b', 0}, // incomplete fourth CP
1183390840fbSDimitrij Mijoski {9, 5, 6, 3, 'b', 0}, // incomplete fourth CP
1184390840fbSDimitrij Mijoski };
1185390840fbSDimitrij Mijoski for (test_offsets_error<unsigned char>* it = begin(offsets); it != end(offsets); ++it) {
1186390840fbSDimitrij Mijoski test_offsets_error<unsigned char> t = *it;
1187390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
1188390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1189390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1190390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1191390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1192390840fbSDimitrij Mijoski ExternT old_char = in[t.replace_pos];
1193390840fbSDimitrij Mijoski in[t.replace_pos] = t.replace_char;
1194390840fbSDimitrij Mijoski
1195390840fbSDimitrij Mijoski mbstate_t state = {};
1196390840fbSDimitrij Mijoski const ExternT* in_next = nullptr;
1197390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1198390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1199390840fbSDimitrij Mijoski
1200390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1201390840fbSDimitrij Mijoski assert(res == cvt.error);
1202390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1203390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1204390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1205390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1206390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1207390840fbSDimitrij Mijoski
1208390840fbSDimitrij Mijoski state = mbstate_t();
1209390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
1210390840fbSDimitrij Mijoski assert(len >= 0);
1211390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
1212390840fbSDimitrij Mijoski
1213390840fbSDimitrij Mijoski in[t.replace_pos] = old_char;
1214390840fbSDimitrij Mijoski }
1215390840fbSDimitrij Mijoski }
1216390840fbSDimitrij Mijoski
1217390840fbSDimitrij Mijoski template <class InternT, class ExternT>
utf8_to_ucs2_in(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)1218390840fbSDimitrij Mijoski void utf8_to_ucs2_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1219390840fbSDimitrij Mijoski utf8_to_ucs2_in_ok(cvt);
1220390840fbSDimitrij Mijoski utf8_to_ucs2_in_partial(cvt);
1221390840fbSDimitrij Mijoski utf8_to_ucs2_in_error(cvt);
1222390840fbSDimitrij Mijoski }
1223390840fbSDimitrij Mijoski
1224390840fbSDimitrij Mijoski template <class InternT, class ExternT>
ucs2_to_utf8_out_ok(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)1225390840fbSDimitrij Mijoski void ucs2_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1226390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1227390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1228390840fbSDimitrij Mijoski const unsigned char expected[] = "b\u0448\uAAAA";
1229390840fbSDimitrij Mijoski static_assert(array_size(input) == 4, "");
1230390840fbSDimitrij Mijoski static_assert(array_size(expected) == 7, "");
1231390840fbSDimitrij Mijoski
1232390840fbSDimitrij Mijoski InternT in[array_size(input)];
1233390840fbSDimitrij Mijoski ExternT exp[array_size(expected)];
1234390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
1235390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1236390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(in) == 3);
1237390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(exp) == 6);
1238390840fbSDimitrij Mijoski
1239390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}};
1240390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1241390840fbSDimitrij Mijoski test_offsets_ok t = *it;
1242390840fbSDimitrij Mijoski ExternT out[array_size(exp) - 1] = {};
1243390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1244390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1245390840fbSDimitrij Mijoski mbstate_t state = {};
1246390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
1247390840fbSDimitrij Mijoski ExternT* out_next = nullptr;
1248390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1249390840fbSDimitrij Mijoski
1250390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1251390840fbSDimitrij Mijoski assert(res == cvt.ok);
1252390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
1253390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
1254390840fbSDimitrij Mijoski assert(char_traits<ExternT>::compare(out, exp, t.out_size) == 0);
1255390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
1256390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
1257390840fbSDimitrij Mijoski }
1258390840fbSDimitrij Mijoski }
1259390840fbSDimitrij Mijoski
1260390840fbSDimitrij Mijoski template <class InternT, class ExternT>
ucs2_to_utf8_out_partial(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)1261390840fbSDimitrij Mijoski void ucs2_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1262390840fbSDimitrij Mijoski // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1263390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1264390840fbSDimitrij Mijoski const unsigned char expected[] = "b\u0448\uAAAA";
1265390840fbSDimitrij Mijoski static_assert(array_size(input) == 4, "");
1266390840fbSDimitrij Mijoski static_assert(array_size(expected) == 7, "");
1267390840fbSDimitrij Mijoski
1268390840fbSDimitrij Mijoski InternT in[array_size(input)];
1269390840fbSDimitrij Mijoski ExternT exp[array_size(expected)];
1270390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
1271390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1272390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(in) == 3);
1273390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(exp) == 6);
1274390840fbSDimitrij Mijoski
1275390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
1276390840fbSDimitrij Mijoski {1, 0, 0, 0}, // no space for first CP
1277390840fbSDimitrij Mijoski
1278390840fbSDimitrij Mijoski {2, 1, 1, 1}, // no space for second CP
1279390840fbSDimitrij Mijoski {2, 2, 1, 1}, // no space for second CP
1280390840fbSDimitrij Mijoski
1281390840fbSDimitrij Mijoski {3, 3, 2, 3}, // no space for third CP
1282390840fbSDimitrij Mijoski {3, 4, 2, 3}, // no space for third CP
1283390840fbSDimitrij Mijoski {3, 5, 2, 3}, // no space for third CP
1284390840fbSDimitrij Mijoski };
1285390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1286390840fbSDimitrij Mijoski test_offsets_partial t = *it;
1287390840fbSDimitrij Mijoski ExternT out[array_size(exp) - 1] = {};
1288390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1289390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1290390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1291390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1292390840fbSDimitrij Mijoski mbstate_t state = {};
1293390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
1294390840fbSDimitrij Mijoski ExternT* out_next = nullptr;
1295390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1296390840fbSDimitrij Mijoski
1297390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1298390840fbSDimitrij Mijoski assert(res == cvt.partial);
1299390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1300390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1301390840fbSDimitrij Mijoski assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
1302390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1303390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1304390840fbSDimitrij Mijoski }
1305390840fbSDimitrij Mijoski }
1306390840fbSDimitrij Mijoski
1307390840fbSDimitrij Mijoski template <class InternT, class ExternT>
ucs2_to_utf8_out_error(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)1308390840fbSDimitrij Mijoski void ucs2_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1309390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1310390840fbSDimitrij Mijoski const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
1311390840fbSDimitrij Mijoski static_assert(array_size(input) == 6, "");
1312390840fbSDimitrij Mijoski static_assert(array_size(expected) == 11, "");
1313390840fbSDimitrij Mijoski
1314390840fbSDimitrij Mijoski InternT in[array_size(input)];
1315390840fbSDimitrij Mijoski ExternT exp[array_size(expected)];
1316390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
1317390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1318390840fbSDimitrij Mijoski assert(char_traits<InternT>::length(in) == 5);
1319390840fbSDimitrij Mijoski assert(char_traits<ExternT>::length(exp) == 10);
1320390840fbSDimitrij Mijoski
1321390840fbSDimitrij Mijoski test_offsets_error<InternT> offsets[] = {
1322390840fbSDimitrij Mijoski {3, 6, 0, 0, 0xD800, 0},
1323390840fbSDimitrij Mijoski {3, 6, 0, 0, 0xDBFF, 0},
1324390840fbSDimitrij Mijoski {3, 6, 0, 0, 0xDC00, 0},
1325390840fbSDimitrij Mijoski {3, 6, 0, 0, 0xDFFF, 0},
1326390840fbSDimitrij Mijoski
1327390840fbSDimitrij Mijoski {3, 6, 1, 1, 0xD800, 1},
1328390840fbSDimitrij Mijoski {3, 6, 1, 1, 0xDBFF, 1},
1329390840fbSDimitrij Mijoski {3, 6, 1, 1, 0xDC00, 1},
1330390840fbSDimitrij Mijoski {3, 6, 1, 1, 0xDFFF, 1},
1331390840fbSDimitrij Mijoski
1332390840fbSDimitrij Mijoski {3, 6, 2, 3, 0xD800, 2},
1333390840fbSDimitrij Mijoski {3, 6, 2, 3, 0xDBFF, 2},
1334390840fbSDimitrij Mijoski {3, 6, 2, 3, 0xDC00, 2},
1335390840fbSDimitrij Mijoski {3, 6, 2, 3, 0xDFFF, 2},
1336390840fbSDimitrij Mijoski
1337390840fbSDimitrij Mijoski // make the leading surrogate a trailing one
1338390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xDC00, 3},
1339390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xDFFF, 3},
1340390840fbSDimitrij Mijoski
1341390840fbSDimitrij Mijoski // make the trailing surrogate a leading one
1342390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xD800, 4},
1343390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xDBFF, 4},
1344390840fbSDimitrij Mijoski
1345390840fbSDimitrij Mijoski // make the trailing surrogate a BMP char
1346390840fbSDimitrij Mijoski {5, 10, 3, 6, 'z', 4},
1347390840fbSDimitrij Mijoski
1348390840fbSDimitrij Mijoski // don't replace anything in the test cases bellow, just show the surrogate
1349390840fbSDimitrij Mijoski // pair (fourth CP) fully or partially
1350390840fbSDimitrij Mijoski {5, 10, 3, 6, 'b', 0},
1351390840fbSDimitrij Mijoski {5, 7, 3, 6, 'b', 0}, // no space for fourth CP
1352390840fbSDimitrij Mijoski {5, 8, 3, 6, 'b', 0}, // no space for fourth CP
1353390840fbSDimitrij Mijoski {5, 9, 3, 6, 'b', 0}, // no space for fourth CP
1354390840fbSDimitrij Mijoski
1355390840fbSDimitrij Mijoski {4, 10, 3, 6, 'b', 0}, // incomplete fourth CP
1356390840fbSDimitrij Mijoski {4, 7, 3, 6, 'b', 0}, // incomplete fourth CP, and no space for it
1357390840fbSDimitrij Mijoski {4, 8, 3, 6, 'b', 0}, // incomplete fourth CP, and no space for it
1358390840fbSDimitrij Mijoski {4, 9, 3, 6, 'b', 0}, // incomplete fourth CP, and no space for it
1359390840fbSDimitrij Mijoski };
1360390840fbSDimitrij Mijoski
1361390840fbSDimitrij Mijoski for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
1362390840fbSDimitrij Mijoski test_offsets_error<InternT> t = *it;
1363390840fbSDimitrij Mijoski ExternT out[array_size(exp) - 1] = {};
1364390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1365390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1366390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1367390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1368390840fbSDimitrij Mijoski InternT old_char = in[t.replace_pos];
1369390840fbSDimitrij Mijoski in[t.replace_pos] = t.replace_char;
1370390840fbSDimitrij Mijoski
1371390840fbSDimitrij Mijoski mbstate_t state = {};
1372390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
1373390840fbSDimitrij Mijoski ExternT* out_next = nullptr;
1374390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1375390840fbSDimitrij Mijoski
1376390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1377390840fbSDimitrij Mijoski assert(res == cvt.error);
1378390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1379390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1380390840fbSDimitrij Mijoski assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
1381390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1382390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1383390840fbSDimitrij Mijoski
1384390840fbSDimitrij Mijoski in[t.replace_pos] = old_char;
1385390840fbSDimitrij Mijoski }
1386390840fbSDimitrij Mijoski }
1387390840fbSDimitrij Mijoski
1388390840fbSDimitrij Mijoski template <class InternT, class ExternT>
ucs2_to_utf8_out(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)1389390840fbSDimitrij Mijoski void ucs2_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1390390840fbSDimitrij Mijoski ucs2_to_utf8_out_ok(cvt);
1391390840fbSDimitrij Mijoski ucs2_to_utf8_out_partial(cvt);
1392390840fbSDimitrij Mijoski ucs2_to_utf8_out_error(cvt);
1393390840fbSDimitrij Mijoski }
1394390840fbSDimitrij Mijoski
1395390840fbSDimitrij Mijoski template <class InternT, class ExternT>
test_utf8_ucs2_cvt(const std::codecvt<InternT,ExternT,mbstate_t> & cvt)1396390840fbSDimitrij Mijoski void test_utf8_ucs2_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1397390840fbSDimitrij Mijoski utf8_to_ucs2_in(cvt);
1398390840fbSDimitrij Mijoski ucs2_to_utf8_out(cvt);
1399390840fbSDimitrij Mijoski }
1400390840fbSDimitrij Mijoski
1401390840fbSDimitrij Mijoski enum utf16_endianess { utf16_big_endian, utf16_little_endian };
1402390840fbSDimitrij Mijoski
1403390840fbSDimitrij Mijoski template <class Iter1, class Iter2>
utf16_to_bytes(Iter1 f,Iter1 l,Iter2 o,utf16_endianess e)1404390840fbSDimitrij Mijoski Iter2 utf16_to_bytes(Iter1 f, Iter1 l, Iter2 o, utf16_endianess e) {
1405390840fbSDimitrij Mijoski if (e == utf16_big_endian)
1406390840fbSDimitrij Mijoski for (; f != l; ++f) {
1407390840fbSDimitrij Mijoski *o++ = (*f >> 8) & 0xFF;
1408390840fbSDimitrij Mijoski *o++ = *f & 0xFF;
1409390840fbSDimitrij Mijoski }
1410390840fbSDimitrij Mijoski else
1411390840fbSDimitrij Mijoski for (; f != l; ++f) {
1412390840fbSDimitrij Mijoski *o++ = *f & 0xFF;
1413390840fbSDimitrij Mijoski *o++ = (*f >> 8) & 0xFF;
1414390840fbSDimitrij Mijoski }
1415390840fbSDimitrij Mijoski return o;
1416390840fbSDimitrij Mijoski }
1417390840fbSDimitrij Mijoski
1418390840fbSDimitrij Mijoski template <class InternT>
utf16_to_utf32_in_ok(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1419390840fbSDimitrij Mijoski void utf16_to_utf32_in_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1420390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1421390840fbSDimitrij Mijoski const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1422390840fbSDimitrij Mijoski static_assert(array_size(input) == 6, "");
1423390840fbSDimitrij Mijoski static_assert(array_size(expected) == 5, "");
1424390840fbSDimitrij Mijoski
1425390840fbSDimitrij Mijoski char in[array_size(input) * 2];
1426390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
1427390840fbSDimitrij Mijoski utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1428390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1429390840fbSDimitrij Mijoski
1430390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}, {10, 4}};
1431390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1432390840fbSDimitrij Mijoski test_offsets_ok t = *it;
1433390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
1434390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1435390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1436390840fbSDimitrij Mijoski mbstate_t state = {};
1437390840fbSDimitrij Mijoski const char* in_next = nullptr;
1438390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1439390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1440390840fbSDimitrij Mijoski
1441390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1442390840fbSDimitrij Mijoski assert(res == cvt.ok);
1443390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
1444390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
1445390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1446390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
1447390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
1448390840fbSDimitrij Mijoski
1449390840fbSDimitrij Mijoski state = mbstate_t();
1450390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
1451390840fbSDimitrij Mijoski assert(len >= 0);
1452390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
1453390840fbSDimitrij Mijoski }
1454390840fbSDimitrij Mijoski
1455390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1456390840fbSDimitrij Mijoski test_offsets_ok t = *it;
1457390840fbSDimitrij Mijoski InternT out[array_size(exp)] = {};
1458390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1459390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1460390840fbSDimitrij Mijoski mbstate_t state = {};
1461390840fbSDimitrij Mijoski const char* in_next = nullptr;
1462390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1463390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1464390840fbSDimitrij Mijoski
1465390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
1466390840fbSDimitrij Mijoski assert(res == cvt.ok);
1467390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
1468390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
1469390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1470390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
1471390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
1472390840fbSDimitrij Mijoski
1473390840fbSDimitrij Mijoski state = mbstate_t();
1474390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, array_size(out));
1475390840fbSDimitrij Mijoski assert(len >= 0);
1476390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
1477390840fbSDimitrij Mijoski }
1478390840fbSDimitrij Mijoski }
1479390840fbSDimitrij Mijoski
1480390840fbSDimitrij Mijoski template <class InternT>
utf16_to_utf32_in_partial(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1481390840fbSDimitrij Mijoski void utf16_to_utf32_in_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1482390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1483390840fbSDimitrij Mijoski const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1484390840fbSDimitrij Mijoski static_assert(array_size(input) == 6, "");
1485390840fbSDimitrij Mijoski static_assert(array_size(expected) == 5, "");
1486390840fbSDimitrij Mijoski
1487390840fbSDimitrij Mijoski char in[array_size(input) * 2];
1488390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
1489390840fbSDimitrij Mijoski utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1490390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1491390840fbSDimitrij Mijoski
1492390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
1493390840fbSDimitrij Mijoski {2, 0, 0, 0}, // no space for first CP
1494390840fbSDimitrij Mijoski {1, 1, 0, 0}, // incomplete first CP
1495390840fbSDimitrij Mijoski {1, 0, 0, 0}, // incomplete first CP, and no space for it
1496390840fbSDimitrij Mijoski
1497390840fbSDimitrij Mijoski {4, 1, 2, 1}, // no space for second CP
1498390840fbSDimitrij Mijoski {3, 2, 2, 1}, // incomplete second CP
1499390840fbSDimitrij Mijoski {3, 1, 2, 1}, // incomplete second CP, and no space for it
1500390840fbSDimitrij Mijoski
1501390840fbSDimitrij Mijoski {6, 2, 4, 2}, // no space for third CP
1502390840fbSDimitrij Mijoski {5, 3, 4, 2}, // incomplete third CP
1503390840fbSDimitrij Mijoski {5, 2, 4, 2}, // incomplete third CP, and no space for it
1504390840fbSDimitrij Mijoski
1505390840fbSDimitrij Mijoski {10, 3, 6, 3}, // no space for fourth CP
1506390840fbSDimitrij Mijoski {7, 4, 6, 3}, // incomplete fourth CP
1507390840fbSDimitrij Mijoski {8, 4, 6, 3}, // incomplete fourth CP
1508390840fbSDimitrij Mijoski {9, 4, 6, 3}, // incomplete fourth CP
1509390840fbSDimitrij Mijoski {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
1510390840fbSDimitrij Mijoski {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
1511390840fbSDimitrij Mijoski {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
1512390840fbSDimitrij Mijoski };
1513390840fbSDimitrij Mijoski
1514390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1515390840fbSDimitrij Mijoski test_offsets_partial t = *it;
1516390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
1517390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1518390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1519390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1520390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1521390840fbSDimitrij Mijoski mbstate_t state = {};
1522390840fbSDimitrij Mijoski const char* in_next = nullptr;
1523390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1524390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1525390840fbSDimitrij Mijoski
1526390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1527390840fbSDimitrij Mijoski assert(res == cvt.partial);
1528390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1529390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1530390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1531390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1532390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1533390840fbSDimitrij Mijoski
1534390840fbSDimitrij Mijoski state = mbstate_t();
1535390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
1536390840fbSDimitrij Mijoski assert(len >= 0);
1537390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
1538390840fbSDimitrij Mijoski }
1539390840fbSDimitrij Mijoski }
1540390840fbSDimitrij Mijoski
1541390840fbSDimitrij Mijoski template <class InternT>
utf16_to_utf32_in_error(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1542390840fbSDimitrij Mijoski void utf16_to_utf32_in_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1543390840fbSDimitrij Mijoski char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1544390840fbSDimitrij Mijoski const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1545390840fbSDimitrij Mijoski static_assert(array_size(input) == 6, "");
1546390840fbSDimitrij Mijoski static_assert(array_size(expected) == 5, "");
1547390840fbSDimitrij Mijoski
1548390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
1549390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1550390840fbSDimitrij Mijoski
1551390840fbSDimitrij Mijoski // The only possible error in UTF-16 is unpaired surrogate code units.
1552390840fbSDimitrij Mijoski // So we replace valid code points (scalar values) with lone surrogate CU.
1553390840fbSDimitrij Mijoski test_offsets_error<char16_t> offsets[] = {
1554390840fbSDimitrij Mijoski {10, 4, 0, 0, 0xD800, 0},
1555390840fbSDimitrij Mijoski {10, 4, 0, 0, 0xDBFF, 0},
1556390840fbSDimitrij Mijoski {10, 4, 0, 0, 0xDC00, 0},
1557390840fbSDimitrij Mijoski {10, 4, 0, 0, 0xDFFF, 0},
1558390840fbSDimitrij Mijoski
1559390840fbSDimitrij Mijoski {10, 4, 2, 1, 0xD800, 1},
1560390840fbSDimitrij Mijoski {10, 4, 2, 1, 0xDBFF, 1},
1561390840fbSDimitrij Mijoski {10, 4, 2, 1, 0xDC00, 1},
1562390840fbSDimitrij Mijoski {10, 4, 2, 1, 0xDFFF, 1},
1563390840fbSDimitrij Mijoski
1564390840fbSDimitrij Mijoski {10, 4, 4, 2, 0xD800, 2},
1565390840fbSDimitrij Mijoski {10, 4, 4, 2, 0xDBFF, 2},
1566390840fbSDimitrij Mijoski {10, 4, 4, 2, 0xDC00, 2},
1567390840fbSDimitrij Mijoski {10, 4, 4, 2, 0xDFFF, 2},
1568390840fbSDimitrij Mijoski
1569390840fbSDimitrij Mijoski // make the leading surrogate a trailing one
1570390840fbSDimitrij Mijoski {10, 4, 6, 3, 0xDC00, 3},
1571390840fbSDimitrij Mijoski {10, 4, 6, 3, 0xDFFF, 3},
1572390840fbSDimitrij Mijoski
1573390840fbSDimitrij Mijoski // make the trailing surrogate a leading one
1574390840fbSDimitrij Mijoski {10, 4, 6, 3, 0xD800, 4},
1575390840fbSDimitrij Mijoski {10, 4, 6, 3, 0xDBFF, 4},
1576390840fbSDimitrij Mijoski
1577390840fbSDimitrij Mijoski // make the trailing surrogate a BMP char
1578390840fbSDimitrij Mijoski {10, 4, 6, 3, 'z', 4},
1579390840fbSDimitrij Mijoski };
1580390840fbSDimitrij Mijoski
1581390840fbSDimitrij Mijoski for (test_offsets_error<char16_t>* it = begin(offsets); it != end(offsets); ++it) {
1582390840fbSDimitrij Mijoski test_offsets_error<char16_t> t = *it;
1583390840fbSDimitrij Mijoski char in[array_size(input) * 2];
1584390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
1585390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1586390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1587390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1588390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1589390840fbSDimitrij Mijoski char16_t old_char = input[t.replace_pos];
1590390840fbSDimitrij Mijoski input[t.replace_pos] = t.replace_char; // replace in input, not in in
1591390840fbSDimitrij Mijoski utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1592390840fbSDimitrij Mijoski
1593390840fbSDimitrij Mijoski mbstate_t state = {};
1594390840fbSDimitrij Mijoski const char* in_next = nullptr;
1595390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1596390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1597390840fbSDimitrij Mijoski
1598390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1599390840fbSDimitrij Mijoski assert(res == cvt.error);
1600390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1601390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1602390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1603390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1604390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1605390840fbSDimitrij Mijoski
1606390840fbSDimitrij Mijoski state = mbstate_t();
1607390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
1608390840fbSDimitrij Mijoski assert(len >= 0);
1609390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
1610390840fbSDimitrij Mijoski
1611390840fbSDimitrij Mijoski input[t.replace_pos] = old_char;
1612390840fbSDimitrij Mijoski }
1613390840fbSDimitrij Mijoski }
1614390840fbSDimitrij Mijoski
1615390840fbSDimitrij Mijoski template <class InternT>
utf32_to_utf16_out_ok(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1616390840fbSDimitrij Mijoski void utf32_to_utf16_out_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1617390840fbSDimitrij Mijoski const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1618390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1619390840fbSDimitrij Mijoski static_assert(array_size(input) == 5, "");
1620390840fbSDimitrij Mijoski static_assert(array_size(expected) == 6, "");
1621390840fbSDimitrij Mijoski
1622390840fbSDimitrij Mijoski InternT in[array_size(input)];
1623390840fbSDimitrij Mijoski char exp[array_size(expected) * 2];
1624390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
1625390840fbSDimitrij Mijoski utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1626390840fbSDimitrij Mijoski
1627390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}, {4, 10}};
1628390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1629390840fbSDimitrij Mijoski test_offsets_ok t = *it;
1630390840fbSDimitrij Mijoski char out[array_size(exp) - 2] = {};
1631390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1632390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1633390840fbSDimitrij Mijoski mbstate_t state = {};
1634390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
1635390840fbSDimitrij Mijoski char* out_next = nullptr;
1636390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1637390840fbSDimitrij Mijoski
1638390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1639390840fbSDimitrij Mijoski assert(res == cvt.ok);
1640390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
1641390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
1642390840fbSDimitrij Mijoski assert(char_traits<char>::compare(out, exp, t.out_size) == 0);
1643390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
1644390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
1645390840fbSDimitrij Mijoski }
1646390840fbSDimitrij Mijoski }
1647390840fbSDimitrij Mijoski
1648390840fbSDimitrij Mijoski template <class InternT>
utf32_to_utf16_out_partial(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1649390840fbSDimitrij Mijoski void utf32_to_utf16_out_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1650390840fbSDimitrij Mijoski const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1651390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1652390840fbSDimitrij Mijoski static_assert(array_size(input) == 5, "");
1653390840fbSDimitrij Mijoski static_assert(array_size(expected) == 6, "");
1654390840fbSDimitrij Mijoski
1655390840fbSDimitrij Mijoski InternT in[array_size(input)];
1656390840fbSDimitrij Mijoski char exp[array_size(expected) * 2];
1657390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
1658390840fbSDimitrij Mijoski utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1659390840fbSDimitrij Mijoski
1660390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
1661390840fbSDimitrij Mijoski {1, 0, 0, 0}, // no space for first CP
1662390840fbSDimitrij Mijoski {1, 1, 0, 0}, // no space for first CP
1663390840fbSDimitrij Mijoski
1664390840fbSDimitrij Mijoski {2, 2, 1, 2}, // no space for second CP
1665390840fbSDimitrij Mijoski {2, 3, 1, 2}, // no space for second CP
1666390840fbSDimitrij Mijoski
1667390840fbSDimitrij Mijoski {3, 4, 2, 4}, // no space for third CP
1668390840fbSDimitrij Mijoski {3, 5, 2, 4}, // no space for third CP
1669390840fbSDimitrij Mijoski
1670390840fbSDimitrij Mijoski {4, 6, 3, 6}, // no space for fourth CP
1671390840fbSDimitrij Mijoski {4, 7, 3, 6}, // no space for fourth CP
1672390840fbSDimitrij Mijoski {4, 8, 3, 6}, // no space for fourth CP
1673390840fbSDimitrij Mijoski {4, 9, 3, 6}, // no space for fourth CP
1674390840fbSDimitrij Mijoski };
1675390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1676390840fbSDimitrij Mijoski test_offsets_partial t = *it;
1677390840fbSDimitrij Mijoski char out[array_size(exp) - 2] = {};
1678390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1679390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1680390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1681390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1682390840fbSDimitrij Mijoski mbstate_t state = {};
1683390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
1684390840fbSDimitrij Mijoski char* out_next = nullptr;
1685390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1686390840fbSDimitrij Mijoski
1687390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1688390840fbSDimitrij Mijoski assert(res == cvt.partial);
1689390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1690390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1691390840fbSDimitrij Mijoski assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
1692390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1693390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1694390840fbSDimitrij Mijoski }
1695390840fbSDimitrij Mijoski }
1696390840fbSDimitrij Mijoski
1697390840fbSDimitrij Mijoski template <class InternT>
utf32_to_utf16_out_error(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1698390840fbSDimitrij Mijoski void utf32_to_utf16_out_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1699390840fbSDimitrij Mijoski const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1700390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1701390840fbSDimitrij Mijoski static_assert(array_size(input) == 5, "");
1702390840fbSDimitrij Mijoski static_assert(array_size(expected) == 6, "");
1703390840fbSDimitrij Mijoski
1704390840fbSDimitrij Mijoski InternT in[array_size(input)];
1705390840fbSDimitrij Mijoski char exp[array_size(expected) * 2];
1706390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
1707390840fbSDimitrij Mijoski utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1708390840fbSDimitrij Mijoski
1709390840fbSDimitrij Mijoski test_offsets_error<InternT> offsets[] = {
1710390840fbSDimitrij Mijoski
1711390840fbSDimitrij Mijoski // Surrogate CP
1712390840fbSDimitrij Mijoski {4, 10, 0, 0, 0xD800, 0},
1713390840fbSDimitrij Mijoski {4, 10, 1, 2, 0xDBFF, 1},
1714390840fbSDimitrij Mijoski {4, 10, 2, 4, 0xDC00, 2},
1715390840fbSDimitrij Mijoski {4, 10, 3, 6, 0xDFFF, 3},
1716390840fbSDimitrij Mijoski
1717390840fbSDimitrij Mijoski // CP out of range
1718390840fbSDimitrij Mijoski {4, 10, 0, 0, 0x00110000, 0},
1719390840fbSDimitrij Mijoski {4, 10, 1, 2, 0x00110000, 1},
1720390840fbSDimitrij Mijoski {4, 10, 2, 4, 0x00110000, 2},
1721390840fbSDimitrij Mijoski {4, 10, 3, 6, 0x00110000, 3}};
1722390840fbSDimitrij Mijoski
1723390840fbSDimitrij Mijoski for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
1724390840fbSDimitrij Mijoski test_offsets_error<InternT> t = *it;
1725390840fbSDimitrij Mijoski char out[array_size(exp) - 2] = {};
1726390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1727390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1728390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1729390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1730390840fbSDimitrij Mijoski InternT old_char = in[t.replace_pos];
1731390840fbSDimitrij Mijoski in[t.replace_pos] = t.replace_char;
1732390840fbSDimitrij Mijoski
1733390840fbSDimitrij Mijoski mbstate_t state = {};
1734390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
1735390840fbSDimitrij Mijoski char* out_next = nullptr;
1736390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1737390840fbSDimitrij Mijoski
1738390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1739390840fbSDimitrij Mijoski assert(res == cvt.error);
1740390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1741390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1742390840fbSDimitrij Mijoski assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
1743390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1744390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1745390840fbSDimitrij Mijoski
1746390840fbSDimitrij Mijoski in[t.replace_pos] = old_char;
1747390840fbSDimitrij Mijoski }
1748390840fbSDimitrij Mijoski }
1749390840fbSDimitrij Mijoski
1750390840fbSDimitrij Mijoski template <class InternT>
test_utf16_utf32_cvt(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1751390840fbSDimitrij Mijoski void test_utf16_utf32_cvt(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1752390840fbSDimitrij Mijoski utf16_to_utf32_in_ok(cvt, endianess);
1753390840fbSDimitrij Mijoski utf16_to_utf32_in_partial(cvt, endianess);
1754390840fbSDimitrij Mijoski utf16_to_utf32_in_error(cvt, endianess);
1755390840fbSDimitrij Mijoski utf32_to_utf16_out_ok(cvt, endianess);
1756390840fbSDimitrij Mijoski utf32_to_utf16_out_partial(cvt, endianess);
1757390840fbSDimitrij Mijoski utf32_to_utf16_out_error(cvt, endianess);
1758390840fbSDimitrij Mijoski }
1759390840fbSDimitrij Mijoski
1760390840fbSDimitrij Mijoski template <class InternT>
utf16_to_ucs2_in_ok(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1761390840fbSDimitrij Mijoski void utf16_to_ucs2_in_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1762390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1763390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1764390840fbSDimitrij Mijoski static_assert(array_size(input) == 4, "");
1765390840fbSDimitrij Mijoski static_assert(array_size(expected) == 4, "");
1766390840fbSDimitrij Mijoski
1767390840fbSDimitrij Mijoski char in[array_size(input) * 2];
1768390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
1769390840fbSDimitrij Mijoski utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1770390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1771390840fbSDimitrij Mijoski
1772390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}};
1773390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1774390840fbSDimitrij Mijoski test_offsets_ok t = *it;
1775390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
1776390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1777390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1778390840fbSDimitrij Mijoski mbstate_t state = {};
1779390840fbSDimitrij Mijoski const char* in_next = nullptr;
1780390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1781390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1782390840fbSDimitrij Mijoski
1783390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1784390840fbSDimitrij Mijoski assert(res == cvt.ok);
1785390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
1786390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
1787390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1788390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
1789390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
1790390840fbSDimitrij Mijoski
1791390840fbSDimitrij Mijoski state = mbstate_t();
1792390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
1793390840fbSDimitrij Mijoski assert(len >= 0);
1794390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
1795390840fbSDimitrij Mijoski }
1796390840fbSDimitrij Mijoski
1797390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1798390840fbSDimitrij Mijoski test_offsets_ok t = *it;
1799390840fbSDimitrij Mijoski InternT out[array_size(exp)] = {};
1800390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1801390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1802390840fbSDimitrij Mijoski mbstate_t state = {};
1803390840fbSDimitrij Mijoski const char* in_next = nullptr;
1804390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1805390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1806390840fbSDimitrij Mijoski
1807390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
1808390840fbSDimitrij Mijoski assert(res == cvt.ok);
1809390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
1810390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
1811390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1812390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
1813390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
1814390840fbSDimitrij Mijoski
1815390840fbSDimitrij Mijoski state = mbstate_t();
1816390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, array_size(out));
1817390840fbSDimitrij Mijoski assert(len >= 0);
1818390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.in_size);
1819390840fbSDimitrij Mijoski }
1820390840fbSDimitrij Mijoski }
1821390840fbSDimitrij Mijoski
1822390840fbSDimitrij Mijoski template <class InternT>
utf16_to_ucs2_in_partial(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1823390840fbSDimitrij Mijoski void utf16_to_ucs2_in_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1824390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1825390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1826390840fbSDimitrij Mijoski static_assert(array_size(input) == 4, "");
1827390840fbSDimitrij Mijoski static_assert(array_size(expected) == 4, "");
1828390840fbSDimitrij Mijoski
1829390840fbSDimitrij Mijoski char in[array_size(input) * 2];
1830390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
1831390840fbSDimitrij Mijoski utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1832390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1833390840fbSDimitrij Mijoski
1834390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
1835390840fbSDimitrij Mijoski {2, 0, 0, 0}, // no space for first CP
1836390840fbSDimitrij Mijoski {1, 1, 0, 0}, // incomplete first CP
1837390840fbSDimitrij Mijoski {1, 0, 0, 0}, // incomplete first CP, and no space for it
1838390840fbSDimitrij Mijoski
1839390840fbSDimitrij Mijoski {4, 1, 2, 1}, // no space for second CP
1840390840fbSDimitrij Mijoski {3, 2, 2, 1}, // incomplete second CP
1841390840fbSDimitrij Mijoski {3, 1, 2, 1}, // incomplete second CP, and no space for it
1842390840fbSDimitrij Mijoski
1843390840fbSDimitrij Mijoski {6, 2, 4, 2}, // no space for third CP
1844390840fbSDimitrij Mijoski {5, 3, 4, 2}, // incomplete third CP
1845390840fbSDimitrij Mijoski {5, 2, 4, 2}, // incomplete third CP, and no space for it
1846390840fbSDimitrij Mijoski };
1847390840fbSDimitrij Mijoski
1848390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1849390840fbSDimitrij Mijoski test_offsets_partial t = *it;
1850390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
1851390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1852390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1853390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1854390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1855390840fbSDimitrij Mijoski mbstate_t state = {};
1856390840fbSDimitrij Mijoski const char* in_next = nullptr;
1857390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1858390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1859390840fbSDimitrij Mijoski
1860390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1861390840fbSDimitrij Mijoski assert(res == cvt.partial);
1862390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1863390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1864390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1865390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1866390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1867390840fbSDimitrij Mijoski
1868390840fbSDimitrij Mijoski state = mbstate_t();
1869390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
1870390840fbSDimitrij Mijoski assert(len >= 0);
1871390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
1872390840fbSDimitrij Mijoski }
1873390840fbSDimitrij Mijoski }
1874390840fbSDimitrij Mijoski
1875390840fbSDimitrij Mijoski template <class InternT>
utf16_to_ucs2_in_error(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1876390840fbSDimitrij Mijoski void utf16_to_ucs2_in_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1877390840fbSDimitrij Mijoski char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1878390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1879390840fbSDimitrij Mijoski static_assert(array_size(input) == 6, "");
1880390840fbSDimitrij Mijoski static_assert(array_size(expected) == 6, "");
1881390840fbSDimitrij Mijoski
1882390840fbSDimitrij Mijoski InternT exp[array_size(expected)];
1883390840fbSDimitrij Mijoski copy(begin(expected), end(expected), begin(exp));
1884390840fbSDimitrij Mijoski
1885390840fbSDimitrij Mijoski // The only possible error in UTF-16 is unpaired surrogate code units.
1886390840fbSDimitrij Mijoski // Additionally, because the target encoding is UCS-2, a proper pair of
1887390840fbSDimitrij Mijoski // surrogates is also error. Simply, any surrogate CU is error.
1888390840fbSDimitrij Mijoski test_offsets_error<char16_t> offsets[] = {
1889390840fbSDimitrij Mijoski {6, 3, 0, 0, 0xD800, 0},
1890390840fbSDimitrij Mijoski {6, 3, 0, 0, 0xDBFF, 0},
1891390840fbSDimitrij Mijoski {6, 3, 0, 0, 0xDC00, 0},
1892390840fbSDimitrij Mijoski {6, 3, 0, 0, 0xDFFF, 0},
1893390840fbSDimitrij Mijoski
1894390840fbSDimitrij Mijoski {6, 3, 2, 1, 0xD800, 1},
1895390840fbSDimitrij Mijoski {6, 3, 2, 1, 0xDBFF, 1},
1896390840fbSDimitrij Mijoski {6, 3, 2, 1, 0xDC00, 1},
1897390840fbSDimitrij Mijoski {6, 3, 2, 1, 0xDFFF, 1},
1898390840fbSDimitrij Mijoski
1899390840fbSDimitrij Mijoski {6, 3, 4, 2, 0xD800, 2},
1900390840fbSDimitrij Mijoski {6, 3, 4, 2, 0xDBFF, 2},
1901390840fbSDimitrij Mijoski {6, 3, 4, 2, 0xDC00, 2},
1902390840fbSDimitrij Mijoski {6, 3, 4, 2, 0xDFFF, 2},
1903390840fbSDimitrij Mijoski
1904390840fbSDimitrij Mijoski // make the leading surrogate a trailing one
1905390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xDC00, 3},
1906390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xDFFF, 3},
1907390840fbSDimitrij Mijoski
1908390840fbSDimitrij Mijoski // make the trailing surrogate a leading one
1909390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xD800, 4},
1910390840fbSDimitrij Mijoski {10, 5, 6, 3, 0xDBFF, 4},
1911390840fbSDimitrij Mijoski
1912390840fbSDimitrij Mijoski // make the trailing surrogate a BMP char
1913390840fbSDimitrij Mijoski {10, 5, 6, 3, 'z', 4},
1914390840fbSDimitrij Mijoski
1915390840fbSDimitrij Mijoski // don't replace anything in the test cases bellow, just show the surrogate
1916390840fbSDimitrij Mijoski // pair (fourth CP) fully or partially (just the first surrogate)
1917390840fbSDimitrij Mijoski {10, 5, 6, 3, 'b', 0},
1918390840fbSDimitrij Mijoski {8, 5, 6, 3, 'b', 0},
1919390840fbSDimitrij Mijoski {9, 5, 6, 3, 'b', 0},
1920390840fbSDimitrij Mijoski
1921390840fbSDimitrij Mijoski {10, 4, 6, 3, 'b', 0},
1922390840fbSDimitrij Mijoski {8, 4, 6, 3, 'b', 0},
1923390840fbSDimitrij Mijoski {9, 4, 6, 3, 'b', 0},
1924390840fbSDimitrij Mijoski };
1925390840fbSDimitrij Mijoski
1926390840fbSDimitrij Mijoski for (test_offsets_error<char16_t>* it = begin(offsets); it != end(offsets); ++it) {
1927390840fbSDimitrij Mijoski test_offsets_error<char16_t> t = *it;
1928390840fbSDimitrij Mijoski char in[array_size(input) * 2];
1929390840fbSDimitrij Mijoski InternT out[array_size(exp) - 1] = {};
1930390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1931390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1932390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
1933390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
1934390840fbSDimitrij Mijoski char16_t old_char = input[t.replace_pos];
1935390840fbSDimitrij Mijoski input[t.replace_pos] = t.replace_char; // replace in input, not in in
1936390840fbSDimitrij Mijoski utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1937390840fbSDimitrij Mijoski
1938390840fbSDimitrij Mijoski mbstate_t state = {};
1939390840fbSDimitrij Mijoski const char* in_next = nullptr;
1940390840fbSDimitrij Mijoski InternT* out_next = nullptr;
1941390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1942390840fbSDimitrij Mijoski
1943390840fbSDimitrij Mijoski res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1944390840fbSDimitrij Mijoski assert(res == cvt.error);
1945390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
1946390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
1947390840fbSDimitrij Mijoski assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1948390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
1949390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
1950390840fbSDimitrij Mijoski
1951390840fbSDimitrij Mijoski state = mbstate_t();
1952390840fbSDimitrij Mijoski int len = cvt.length(state, in, in + t.in_size, t.out_size);
1953390840fbSDimitrij Mijoski assert(len >= 0);
1954390840fbSDimitrij Mijoski assert(static_cast<size_t>(len) == t.expected_in_next);
1955390840fbSDimitrij Mijoski
1956390840fbSDimitrij Mijoski input[t.replace_pos] = old_char;
1957390840fbSDimitrij Mijoski }
1958390840fbSDimitrij Mijoski }
1959390840fbSDimitrij Mijoski
1960390840fbSDimitrij Mijoski template <class InternT>
ucs2_to_utf16_out_ok(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1961390840fbSDimitrij Mijoski void ucs2_to_utf16_out_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1962390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1963390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1964390840fbSDimitrij Mijoski static_assert(array_size(input) == 4, "");
1965390840fbSDimitrij Mijoski static_assert(array_size(expected) == 4, "");
1966390840fbSDimitrij Mijoski
1967390840fbSDimitrij Mijoski InternT in[array_size(input)];
1968390840fbSDimitrij Mijoski char exp[array_size(expected) * 2];
1969390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
1970390840fbSDimitrij Mijoski utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1971390840fbSDimitrij Mijoski
1972390840fbSDimitrij Mijoski test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}};
1973390840fbSDimitrij Mijoski for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1974390840fbSDimitrij Mijoski test_offsets_ok t = *it;
1975390840fbSDimitrij Mijoski char out[array_size(exp) - 2] = {};
1976390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
1977390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
1978390840fbSDimitrij Mijoski mbstate_t state = {};
1979390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
1980390840fbSDimitrij Mijoski char* out_next = nullptr;
1981390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
1982390840fbSDimitrij Mijoski
1983390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1984390840fbSDimitrij Mijoski assert(res == cvt.ok);
1985390840fbSDimitrij Mijoski assert(in_next == in + t.in_size);
1986390840fbSDimitrij Mijoski assert(out_next == out + t.out_size);
1987390840fbSDimitrij Mijoski assert(char_traits<char>::compare(out, exp, t.out_size) == 0);
1988390840fbSDimitrij Mijoski if (t.out_size < array_size(out))
1989390840fbSDimitrij Mijoski assert(out[t.out_size] == 0);
1990390840fbSDimitrij Mijoski }
1991390840fbSDimitrij Mijoski }
1992390840fbSDimitrij Mijoski
1993390840fbSDimitrij Mijoski template <class InternT>
ucs2_to_utf16_out_partial(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)1994390840fbSDimitrij Mijoski void ucs2_to_utf16_out_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1995390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1996390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1997390840fbSDimitrij Mijoski static_assert(array_size(input) == 4, "");
1998390840fbSDimitrij Mijoski static_assert(array_size(expected) == 4, "");
1999390840fbSDimitrij Mijoski
2000390840fbSDimitrij Mijoski InternT in[array_size(input)];
2001390840fbSDimitrij Mijoski char exp[array_size(expected) * 2];
2002390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
2003390840fbSDimitrij Mijoski utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
2004390840fbSDimitrij Mijoski
2005390840fbSDimitrij Mijoski test_offsets_partial offsets[] = {
2006390840fbSDimitrij Mijoski {1, 0, 0, 0}, // no space for first CP
2007390840fbSDimitrij Mijoski {1, 1, 0, 0}, // no space for first CP
2008390840fbSDimitrij Mijoski
2009390840fbSDimitrij Mijoski {2, 2, 1, 2}, // no space for second CP
2010390840fbSDimitrij Mijoski {2, 3, 1, 2}, // no space for second CP
2011390840fbSDimitrij Mijoski
2012390840fbSDimitrij Mijoski {3, 4, 2, 4}, // no space for third CP
2013390840fbSDimitrij Mijoski {3, 5, 2, 4}, // no space for third CP
2014390840fbSDimitrij Mijoski };
2015390840fbSDimitrij Mijoski for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
2016390840fbSDimitrij Mijoski test_offsets_partial t = *it;
2017390840fbSDimitrij Mijoski char out[array_size(exp) - 2] = {};
2018390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
2019390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
2020390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
2021390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
2022390840fbSDimitrij Mijoski mbstate_t state = {};
2023390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
2024390840fbSDimitrij Mijoski char* out_next = nullptr;
2025390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
2026390840fbSDimitrij Mijoski
2027390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
2028390840fbSDimitrij Mijoski assert(res == cvt.partial);
2029390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
2030390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
2031390840fbSDimitrij Mijoski assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
2032390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
2033390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
2034390840fbSDimitrij Mijoski }
2035390840fbSDimitrij Mijoski }
2036390840fbSDimitrij Mijoski
2037390840fbSDimitrij Mijoski template <class InternT>
ucs2_to_utf16_out_error(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)2038390840fbSDimitrij Mijoski void ucs2_to_utf16_out_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
2039390840fbSDimitrij Mijoski const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
2040390840fbSDimitrij Mijoski const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
2041390840fbSDimitrij Mijoski static_assert(array_size(input) == 6, "");
2042390840fbSDimitrij Mijoski static_assert(array_size(expected) == 6, "");
2043390840fbSDimitrij Mijoski
2044390840fbSDimitrij Mijoski InternT in[array_size(input)];
2045390840fbSDimitrij Mijoski char exp[array_size(expected) * 2];
2046390840fbSDimitrij Mijoski copy(begin(input), end(input), begin(in));
2047390840fbSDimitrij Mijoski utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
2048390840fbSDimitrij Mijoski
2049390840fbSDimitrij Mijoski test_offsets_error<InternT> offsets[] = {
2050390840fbSDimitrij Mijoski {3, 6, 0, 0, 0xD800, 0},
2051390840fbSDimitrij Mijoski {3, 6, 0, 0, 0xDBFF, 0},
2052390840fbSDimitrij Mijoski {3, 6, 0, 0, 0xDC00, 0},
2053390840fbSDimitrij Mijoski {3, 6, 0, 0, 0xDFFF, 0},
2054390840fbSDimitrij Mijoski
2055390840fbSDimitrij Mijoski {3, 6, 1, 2, 0xD800, 1},
2056390840fbSDimitrij Mijoski {3, 6, 1, 2, 0xDBFF, 1},
2057390840fbSDimitrij Mijoski {3, 6, 1, 2, 0xDC00, 1},
2058390840fbSDimitrij Mijoski {3, 6, 1, 2, 0xDFFF, 1},
2059390840fbSDimitrij Mijoski
2060390840fbSDimitrij Mijoski {3, 6, 2, 4, 0xD800, 2},
2061390840fbSDimitrij Mijoski {3, 6, 2, 4, 0xDBFF, 2},
2062390840fbSDimitrij Mijoski {3, 6, 2, 4, 0xDC00, 2},
2063390840fbSDimitrij Mijoski {3, 6, 2, 4, 0xDFFF, 2},
2064390840fbSDimitrij Mijoski
2065390840fbSDimitrij Mijoski // make the leading surrogate a trailing one
2066390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xDC00, 3},
2067390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xDFFF, 3},
2068390840fbSDimitrij Mijoski
2069390840fbSDimitrij Mijoski // make the trailing surrogate a leading one
2070390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xD800, 4},
2071390840fbSDimitrij Mijoski {5, 10, 3, 6, 0xDBFF, 4},
2072390840fbSDimitrij Mijoski
2073390840fbSDimitrij Mijoski // make the trailing surrogate a BMP char
2074390840fbSDimitrij Mijoski {5, 10, 3, 6, 'z', 4},
2075390840fbSDimitrij Mijoski
2076390840fbSDimitrij Mijoski // don't replace anything in the test cases bellow, just show the surrogate
2077390840fbSDimitrij Mijoski // pair (fourth CP) fully or partially (just the first surrogate)
2078390840fbSDimitrij Mijoski {5, 10, 3, 6, 'b', 0},
2079390840fbSDimitrij Mijoski {5, 8, 3, 6, 'b', 0},
2080390840fbSDimitrij Mijoski {5, 9, 3, 6, 'b', 0},
2081390840fbSDimitrij Mijoski
2082390840fbSDimitrij Mijoski {4, 10, 3, 6, 'b', 0},
2083390840fbSDimitrij Mijoski {4, 8, 3, 6, 'b', 0},
2084390840fbSDimitrij Mijoski {4, 9, 3, 6, 'b', 0},
2085390840fbSDimitrij Mijoski };
2086390840fbSDimitrij Mijoski
2087390840fbSDimitrij Mijoski for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
2088390840fbSDimitrij Mijoski test_offsets_error<InternT> t = *it;
2089390840fbSDimitrij Mijoski char out[array_size(exp) - 2] = {};
2090390840fbSDimitrij Mijoski assert(t.in_size <= array_size(in));
2091390840fbSDimitrij Mijoski assert(t.out_size <= array_size(out));
2092390840fbSDimitrij Mijoski assert(t.expected_in_next <= t.in_size);
2093390840fbSDimitrij Mijoski assert(t.expected_out_next <= t.out_size);
2094390840fbSDimitrij Mijoski InternT old_char = in[t.replace_pos];
2095390840fbSDimitrij Mijoski in[t.replace_pos] = t.replace_char;
2096390840fbSDimitrij Mijoski
2097390840fbSDimitrij Mijoski mbstate_t state = {};
2098390840fbSDimitrij Mijoski const InternT* in_next = nullptr;
2099390840fbSDimitrij Mijoski char* out_next = nullptr;
2100390840fbSDimitrij Mijoski codecvt_base::result res = codecvt_base::ok;
2101390840fbSDimitrij Mijoski
2102390840fbSDimitrij Mijoski res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
2103390840fbSDimitrij Mijoski assert(res == cvt.error);
2104390840fbSDimitrij Mijoski assert(in_next == in + t.expected_in_next);
2105390840fbSDimitrij Mijoski assert(out_next == out + t.expected_out_next);
2106390840fbSDimitrij Mijoski assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
2107390840fbSDimitrij Mijoski if (t.expected_out_next < array_size(out))
2108390840fbSDimitrij Mijoski assert(out[t.expected_out_next] == 0);
2109390840fbSDimitrij Mijoski
2110390840fbSDimitrij Mijoski in[t.replace_pos] = old_char;
2111390840fbSDimitrij Mijoski }
2112390840fbSDimitrij Mijoski }
2113390840fbSDimitrij Mijoski
2114390840fbSDimitrij Mijoski template <class InternT>
test_utf16_ucs2_cvt(const std::codecvt<InternT,char,mbstate_t> & cvt,utf16_endianess endianess)2115390840fbSDimitrij Mijoski void test_utf16_ucs2_cvt(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
2116390840fbSDimitrij Mijoski utf16_to_ucs2_in_ok(cvt, endianess);
2117390840fbSDimitrij Mijoski utf16_to_ucs2_in_partial(cvt, endianess);
2118390840fbSDimitrij Mijoski utf16_to_ucs2_in_error(cvt, endianess);
2119390840fbSDimitrij Mijoski ucs2_to_utf16_out_ok(cvt, endianess);
2120390840fbSDimitrij Mijoski ucs2_to_utf16_out_partial(cvt, endianess);
2121390840fbSDimitrij Mijoski ucs2_to_utf16_out_error(cvt, endianess);
2122390840fbSDimitrij Mijoski }
2123390840fbSDimitrij Mijoski
2124390840fbSDimitrij Mijoski using std::codecvt;
2125390840fbSDimitrij Mijoski using std::codecvt_utf16;
2126390840fbSDimitrij Mijoski using std::codecvt_utf8;
2127390840fbSDimitrij Mijoski using std::codecvt_utf8_utf16;
2128390840fbSDimitrij Mijoski using std::has_facet;
2129390840fbSDimitrij Mijoski using std::locale;
2130390840fbSDimitrij Mijoski using std::use_facet;
2131390840fbSDimitrij Mijoski
test_utf8_utf32_codecvts()2132390840fbSDimitrij Mijoski void test_utf8_utf32_codecvts() {
2133390840fbSDimitrij Mijoski typedef codecvt<char32_t, char, mbstate_t> codecvt_c32;
2134390840fbSDimitrij Mijoski const locale& loc_c = locale::classic();
2135390840fbSDimitrij Mijoski assert(has_facet<codecvt_c32>(loc_c));
2136390840fbSDimitrij Mijoski
2137390840fbSDimitrij Mijoski const codecvt_c32& cvt = use_facet<codecvt_c32>(loc_c);
2138390840fbSDimitrij Mijoski test_utf8_utf32_cvt(cvt);
2139390840fbSDimitrij Mijoski
2140390840fbSDimitrij Mijoski codecvt_utf8<char32_t> cvt2;
2141390840fbSDimitrij Mijoski test_utf8_utf32_cvt(cvt2);
2142390840fbSDimitrij Mijoski
2143390840fbSDimitrij Mijoski #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_SHORT_WCHAR)
2144390840fbSDimitrij Mijoski codecvt_utf8<wchar_t> cvt3;
2145390840fbSDimitrij Mijoski test_utf8_utf32_cvt(cvt3);
2146390840fbSDimitrij Mijoski #endif
2147390840fbSDimitrij Mijoski
2148390840fbSDimitrij Mijoski #ifndef TEST_HAS_NO_CHAR8_T
2149390840fbSDimitrij Mijoski typedef codecvt<char32_t, char8_t, mbstate_t> codecvt_c32_c8;
2150390840fbSDimitrij Mijoski assert(has_facet<codecvt_c32_c8>(loc_c));
2151390840fbSDimitrij Mijoski const codecvt_c32_c8& cvt4 = use_facet<codecvt_c32_c8>(loc_c);
2152390840fbSDimitrij Mijoski test_utf8_utf32_cvt(cvt4);
2153390840fbSDimitrij Mijoski #endif
2154390840fbSDimitrij Mijoski }
2155390840fbSDimitrij Mijoski
test_utf8_utf16_codecvts()2156390840fbSDimitrij Mijoski void test_utf8_utf16_codecvts() {
2157390840fbSDimitrij Mijoski typedef codecvt<char16_t, char, mbstate_t> codecvt_c16;
2158390840fbSDimitrij Mijoski const locale& loc_c = locale::classic();
2159390840fbSDimitrij Mijoski assert(has_facet<codecvt_c16>(loc_c));
2160390840fbSDimitrij Mijoski
2161390840fbSDimitrij Mijoski const codecvt_c16& cvt = use_facet<codecvt_c16>(loc_c);
2162390840fbSDimitrij Mijoski test_utf8_utf16_cvt(cvt);
2163390840fbSDimitrij Mijoski
2164390840fbSDimitrij Mijoski codecvt_utf8_utf16<char16_t> cvt2;
2165390840fbSDimitrij Mijoski test_utf8_utf16_cvt(cvt2);
2166390840fbSDimitrij Mijoski
2167390840fbSDimitrij Mijoski codecvt_utf8_utf16<char32_t> cvt3;
2168390840fbSDimitrij Mijoski test_utf8_utf16_cvt(cvt3);
2169390840fbSDimitrij Mijoski
2170390840fbSDimitrij Mijoski #ifndef TEST_HAS_NO_WIDE_CHARACTERS
2171390840fbSDimitrij Mijoski codecvt_utf8_utf16<wchar_t> cvt4;
2172390840fbSDimitrij Mijoski test_utf8_utf16_cvt(cvt4);
2173390840fbSDimitrij Mijoski #endif
2174390840fbSDimitrij Mijoski
2175390840fbSDimitrij Mijoski #ifndef TEST_HAS_NO_CHAR8_T
2176390840fbSDimitrij Mijoski typedef codecvt<char16_t, char8_t, mbstate_t> codecvt_c16_c8;
2177390840fbSDimitrij Mijoski assert(has_facet<codecvt_c16_c8>(loc_c));
2178390840fbSDimitrij Mijoski const codecvt_c16_c8& cvt5 = use_facet<codecvt_c16_c8>(loc_c);
2179390840fbSDimitrij Mijoski test_utf8_utf16_cvt(cvt5);
2180390840fbSDimitrij Mijoski #endif
2181390840fbSDimitrij Mijoski }
2182390840fbSDimitrij Mijoski
test_utf8_ucs2_codecvts()2183390840fbSDimitrij Mijoski void test_utf8_ucs2_codecvts() {
2184390840fbSDimitrij Mijoski codecvt_utf8<char16_t> cvt;
2185390840fbSDimitrij Mijoski test_utf8_ucs2_cvt(cvt);
2186390840fbSDimitrij Mijoski
2187390840fbSDimitrij Mijoski #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(TEST_SHORT_WCHAR)
2188390840fbSDimitrij Mijoski codecvt_utf8<wchar_t> cvt2;
2189390840fbSDimitrij Mijoski test_utf8_ucs2_cvt(cvt2);
2190390840fbSDimitrij Mijoski #endif
2191390840fbSDimitrij Mijoski }
2192390840fbSDimitrij Mijoski
test_utf16_utf32_codecvts()2193390840fbSDimitrij Mijoski void test_utf16_utf32_codecvts() {
2194390840fbSDimitrij Mijoski codecvt_utf16<char32_t> cvt;
2195390840fbSDimitrij Mijoski test_utf16_utf32_cvt(cvt, utf16_big_endian);
2196390840fbSDimitrij Mijoski
2197390840fbSDimitrij Mijoski codecvt_utf16<char32_t, 0x10FFFF, std::little_endian> cvt2;
2198390840fbSDimitrij Mijoski test_utf16_utf32_cvt(cvt2, utf16_little_endian);
2199390840fbSDimitrij Mijoski
2200390840fbSDimitrij Mijoski #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_SHORT_WCHAR)
2201390840fbSDimitrij Mijoski codecvt_utf16<wchar_t> cvt3;
2202390840fbSDimitrij Mijoski test_utf16_utf32_cvt(cvt3, utf16_big_endian);
2203390840fbSDimitrij Mijoski
2204390840fbSDimitrij Mijoski codecvt_utf16<wchar_t, 0x10FFFF, std::little_endian> cvt4;
2205390840fbSDimitrij Mijoski test_utf16_utf32_cvt(cvt4, utf16_little_endian);
2206390840fbSDimitrij Mijoski #endif
2207390840fbSDimitrij Mijoski }
2208390840fbSDimitrij Mijoski
test_utf16_ucs2_codecvts()2209390840fbSDimitrij Mijoski void test_utf16_ucs2_codecvts() {
2210390840fbSDimitrij Mijoski codecvt_utf16<char16_t> cvt;
2211390840fbSDimitrij Mijoski test_utf16_ucs2_cvt(cvt, utf16_big_endian);
2212390840fbSDimitrij Mijoski
2213390840fbSDimitrij Mijoski codecvt_utf16<char16_t, 0x10FFFF, std::little_endian> cvt2;
2214390840fbSDimitrij Mijoski test_utf16_ucs2_cvt(cvt2, utf16_little_endian);
2215390840fbSDimitrij Mijoski
2216390840fbSDimitrij Mijoski #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(TEST_SHORT_WCHAR)
2217390840fbSDimitrij Mijoski codecvt_utf16<wchar_t> cvt3;
2218390840fbSDimitrij Mijoski test_utf16_ucs2_cvt(cvt3, utf16_big_endian);
2219390840fbSDimitrij Mijoski
2220390840fbSDimitrij Mijoski codecvt_utf16<wchar_t, 0x10FFFF, std::little_endian> cvt4;
2221390840fbSDimitrij Mijoski test_utf16_ucs2_cvt(cvt4, utf16_little_endian);
2222390840fbSDimitrij Mijoski #endif
2223390840fbSDimitrij Mijoski }
2224390840fbSDimitrij Mijoski
main()2225390840fbSDimitrij Mijoski int main() {
2226390840fbSDimitrij Mijoski test_utf8_utf32_codecvts();
2227390840fbSDimitrij Mijoski test_utf8_utf16_codecvts();
2228390840fbSDimitrij Mijoski test_utf8_ucs2_codecvts();
2229390840fbSDimitrij Mijoski test_utf16_utf32_codecvts();
2230390840fbSDimitrij Mijoski test_utf16_ucs2_codecvts();
2231390840fbSDimitrij Mijoski }
2232