xref: /netbsd-src/tests/lib/libc/locale/t_mbrtoc16.c (revision f8cf1a9151c7af1cb0bd8b09c13c66bca599c027)
1 /*	$NetBSD: t_mbrtoc16.c,v 1.3 2024/08/20 17:43:09 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2002 Tim J. Robbins
5  * All rights reserved.
6  *
7  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
33  */
34 
35 #include <sys/cdefs.h>
36 __RCSID("$NetBSD: t_mbrtoc16.c,v 1.3 2024/08/20 17:43:09 riastradh Exp $");
37 
38 #include <errno.h>
39 #include <inttypes.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <string.h>
43 #include <uchar.h>
44 
45 #include <atf-c.h>
46 
47 static void
48 require_lc_ctype(const char *locale_name)
49 {
50 	char *lc_ctype_set;
51 
52 	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
53 	if (lc_ctype_set == NULL)
54 		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
55 		    locale_name, errno);
56 
57 	ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
58 	    "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
59 }
60 
61 static mbstate_t s;
62 static char16_t c16;
63 
64 ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
65 ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
66 {
67 	size_t n;
68 
69 	require_lc_ctype("C");
70 
71 	/* Null wide character, internal state. */
72 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
73 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
74 
75 	/* Null wide character. */
76 	memset(&s, 0, sizeof(s));
77 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
78 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
79 
80 	/* Latin letter A, internal state. */
81 	ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
82 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
83 	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
84 	    (uint16_t)c16, (uint16_t)L'A');
85 
86 	/* Latin letter A. */
87 	memset(&s, 0, sizeof(s));
88 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
89 	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
90 	    (uint16_t)c16, (uint16_t)L'A');
91 
92 	/* Incomplete character sequence. */
93 	c16 = L'z';
94 	memset(&s, 0, sizeof(s));
95 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
96 	    "n=%zu", n);
97 	ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
98 	    (uint16_t)c16, (uint16_t)L'z');
99 
100 	/* Check that mbrtoc16() doesn't access the buffer when n == 0. */
101 	c16 = L'z';
102 	memset(&s, 0, sizeof(s));
103 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
104 	    "n=%zu", n);
105 	ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
106 	    (uint16_t)c16, (uint16_t)L'z');
107 
108 	/* Check that mbrtoc16() doesn't read ahead too aggressively. */
109 	memset(&s, 0, sizeof(s));
110 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n);
111 	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
112 	    (uint16_t)c16, (uint16_t)L'A');
113 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n);
114 	ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%"PRIx16" L'C'=U+%"PRIx16,
115 	    (uint16_t)c16, (uint16_t)L'C');
116 }
117 
118 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso2022jp_locale_test);
119 ATF_TC_BODY(mbrtoc16_iso2022jp_locale_test, tc)
120 {
121 	size_t n;
122 
123 	require_lc_ctype("ja_JP.ISO-2022-JP");
124 
125 	/* Null wide character, internal state. */
126 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
127 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
128 
129 	/* Null wide character. */
130 	memset(&s, 0, sizeof(s));
131 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
132 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
133 
134 	/* Latin letter A, internal state. */
135 	ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
136 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
137 	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16,
138 	    (uint16_t)c16, (uint16_t)L'A');
139 
140 	/* Latin letter A. */
141 	memset(&s, 0, sizeof(s));
142 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
143 	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16,
144 	    (uint16_t)c16, (uint16_t)L'A');
145 
146 	/* Incomplete character sequence. */
147 	c16 = L'z';
148 	memset(&s, 0, sizeof(s));
149 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
150 	    "n=%zu", n);
151 	ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%04"PRIx16" L'z'=U+%04"PRIx16,
152 	    (uint16_t)c16, (uint16_t)L'z');
153 
154 	/* Check that mbrtoc16() doesn't access the buffer when n == 0. */
155 	c16 = L'z';
156 	memset(&s, 0, sizeof(s));
157 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
158 	    "n=%zu", n);
159 	ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%04"PRIx16" L'z'=U+%04"PRIx16,
160 	    (uint16_t)c16, (uint16_t)L'z');
161 
162 	/* Check that mbrtoc16() doesn't read ahead too aggressively. */
163 	memset(&s, 0, sizeof(s));
164 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n);
165 	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16,
166 	    (uint16_t)c16, (uint16_t)L'A');
167 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n);
168 	ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%04"PRIx16" L'C'=U+%04"PRIx16,
169 	    (uint16_t)c16, (uint16_t)L'C');
170 
171 	/* Incomplete character sequence (shift sequence only). */
172 	memset(&s, 0, sizeof(s));
173 	c16 = 0;
174 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J", 3, &s)), (size_t)-2,
175 	    "n=%zu", n);
176 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
177 
178 	/* Same as above, but complete (U+00A5 YEN SIGN). */
179 	memset(&s, 0, sizeof(s));
180 	c16 = 0;
181 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J\x5c", 4, &s)), 4,
182 	    "n=%zu", n);
183 	ATF_CHECK_EQ_MSG(c16, 0xa5, "c16=U+%04"PRIx16, (uint16_t)c16);
184 
185 	/* Test restarting behaviour. */
186 	memset(&s, 0, sizeof(s));
187 	c16 = 0;
188 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(", 2, &s)), (size_t)-2,
189 	    "n=%zu", n);
190 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
191 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "J\x5c", 2, &s)), 2, "n=%zu", n);
192 	ATF_CHECK_EQ_MSG(c16, 0xa5, "c16=U+%04"PRIx16, (uint16_t)c16);
193 
194 	/*
195 	 * Test shift sequence state in various increments:
196 	 * 1. U+0042 LATIN CAPITAL LETTER A
197 	 * 2. (shift ISO/IEC 646:JP) U+00A5 YEN SIGN
198 	 * 3. U+00A5 YEN SIGN
199 	 * 4. (shift JIS X 0208) U+30A2 KATAKANA LETTER A
200 	 * 5. U+30A2 KATAKANA LETTER A
201 	 * 6. (shift to initial state) U+0000 NUL
202 	 */
203 	memset(&s, 0, sizeof(s));
204 	c16 = 0;
205 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A\x1b(J", 4, &s)), 1,
206 	    "n=%zu", n);
207 	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16, (uint16_t)c16);
208 	c16 = 0;
209 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J", 3, &s)), (size_t)-2,
210 	    "n=%zu", n);
211 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
212 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x5c\x5c", 2, &s)), 1,
213 	    "n=%zu", n);
214 	ATF_CHECK_EQ_MSG(c16, 0x00a5, "c16=U+%04"PRIx16, (uint16_t)c16);
215 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x5c\x1b$", 3, &s)), 1,
216 	    "n=%zu", n);
217 	ATF_CHECK_EQ_MSG(c16, 0x00a5, "c16=U+%04"PRIx16, (uint16_t)c16);
218 	c16 = 0x1234;
219 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b", 1, &s)), (size_t)-2,
220 	    "n=%zu", n);
221 	ATF_CHECK_EQ_MSG(c16, 0x1234, "c16=U+%04"PRIx16, (uint16_t)c16);
222 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "$B\x25\x22", 4, &s)), 4,
223 	    "n=%zu", n);
224 	ATF_CHECK_EQ_MSG(c16, 0x30a2, "c16=U+%04"PRIx16, (uint16_t)c16);
225 	c16 = 0;
226 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x25", 1, &s)), (size_t)-2,
227 	    "n=%zu", n);
228 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
229 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x22\x1b(B\x00", 5, &s)), 1,
230 	    "n=%zu", n);
231 	ATF_CHECK_EQ_MSG(c16, 0x30a2, "c16=U+%04"PRIx16, (uint16_t)c16);
232 	c16 = 0;
233 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(", 2, &s)), (size_t)-2,
234 	    "n=%zu", n);
235 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
236 	c16 = 42;
237 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "B\x00", 2, &s)), 0, "n=%zu", n);
238 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
239 }
240 
241 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
242 ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
243 {
244 	size_t n;
245 
246 	require_lc_ctype("en_US.ISO8859-1");
247 
248 	/* Currency sign. */
249 	memset(&s, 0, sizeof(s));
250 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
251 	ATF_CHECK_EQ_MSG(c16, 0xa4, "c16=U+%"PRIx16, (uint16_t)c16);
252 }
253 
254 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
255 ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
256 {
257 	size_t n;
258 
259 	require_lc_ctype("en_US.ISO8859-15");
260 
261 	/* Euro sign. */
262 	memset(&s, 0, sizeof(s));
263 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
264 	ATF_CHECK_EQ_MSG(c16, 0x20ac, "c16=U+%"PRIx16, (uint16_t)c16);
265 }
266 
267 ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
268 ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
269 {
270 	size_t n;
271 
272 	require_lc_ctype("en_US.UTF-8");
273 
274 	/* Null wide character, internal state. */
275 	ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
276 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
277 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
278 
279 	/* Null wide character. */
280 	memset(&s, 0, sizeof(s));
281 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
282 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
283 
284 	/* Latin letter A, internal state. */
285 	ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
286 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
287 	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
288 	    (uint16_t)c16, (uint16_t)L'A');
289 
290 	/* Latin letter A. */
291 	memset(&s, 0, sizeof(s));
292 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
293 	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
294 	    (uint16_t)c16, (uint16_t)L'A');
295 
296 	/* Incomplete character sequence (zero length). */
297 	c16 = L'z';
298 	memset(&s, 0, sizeof(s));
299 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
300 	    "n=%zu", n);
301 	ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
302 	    (uint16_t)c16, (uint16_t)L'z');
303 
304 	/* Incomplete character sequence (truncated double-byte). */
305 	memset(&s, 0, sizeof(s));
306 	c16 = 0;
307 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
308 	    "n=%zu", n);
309 
310 	/* Same as above, but complete. */
311 	memset(&s, 0, sizeof(s));
312 	c16 = 0;
313 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\x84", 2, &s)), 2,
314 	    "n=%zu", n);
315 	ATF_CHECK_EQ_MSG(c16, 0xc4, "c16=U+%"PRIx16, (uint16_t)c16);
316 
317 	/* Test restarting behaviour. */
318 	memset(&s, 0, sizeof(s));
319 	c16 = 0;
320 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
321 	    "n=%zu", n);
322 	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
323 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xb7", 1, &s)), 1, "n=%zu", n);
324 	ATF_CHECK_EQ_MSG(c16, 0xf7, "c16=U+%"PRIx16, (uint16_t)c16);
325 
326 	/* Surrogate pair. */
327 	memset(&s, 0, sizeof(s));
328 	c16 = 0;
329 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s)), 4,
330 	    "n=%zu", n);
331 	ATF_CHECK_EQ_MSG(c16, 0xd83d, "c16=U+%"PRIx16, (uint16_t)c16);
332 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-3,
333 	    "n=%zu", n);
334 	ATF_CHECK_EQ_MSG(c16, 0xdca9, "c16=U+%"PRIx16, (uint16_t)c16);
335 
336 	/* Letter e with acute, precomposed. */
337 	memset(&s, 0, sizeof(s));
338 	c16 = 0;
339 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\xa9", 2, &s)), 2,
340 	    "n=%zu", n);
341 	ATF_CHECK_EQ_MSG(c16, 0xe9, "c16=U+%"PRIx16, (uint16_t)c16);
342 
343 	/* Letter e with acute, combined. */
344 	memset(&s, 0, sizeof(s));
345 	c16 = 0;
346 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x65\xcc\x81", 3, &s)), 1,
347 	    "n=%zu", n);
348 	ATF_CHECK_EQ_MSG(c16, 0x65, "c16=U+%"PRIx16, (uint16_t)c16);
349 	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xcc\x81", 2, &s)), 2,
350 	    "n=%zu", n);
351 	ATF_CHECK_EQ_MSG(c16, 0x301, "c16=U+%"PRIx16, (uint16_t)c16);
352 }
353 
354 ATF_TP_ADD_TCS(tp)
355 {
356 
357 	ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
358 	ATF_TP_ADD_TC(tp, mbrtoc16_iso2022jp_locale_test);
359 	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
360 	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
361 	ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
362 
363 	return (atf_no_error());
364 }
365