xref: /netbsd-src/tests/lib/libc/locale/t_c8rtomb.c (revision a35ceff4b39ccce6de8bd3b28adf00e694090abc)
1 /*	$NetBSD: t_c8rtomb.c,v 1.7 2024/08/19 16:22:10 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2002 Tim J. Robbins
5  * All rights reserved.
6  *
7  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Test program for c8rtomb() as specified by C23.
33  */
34 
35 #include <sys/cdefs.h>
36 __RCSID("$NetBSD: t_c8rtomb.c,v 1.7 2024/08/19 16:22:10 riastradh Exp $");
37 
38 #include <errno.h>
39 #include <limits.h>
40 #include <locale.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <uchar.h>
44 
45 #include <atf-c.h>
46 
47 static void
48 require_lc_ctype(const char *locale_name)
49 {
50 	char *lc_ctype_set;
51 
52 	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
53 	if (lc_ctype_set == NULL)
54 		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
55 		    locale_name, errno);
56 
57 	ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
58 	    "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
59 }
60 
61 static mbstate_t s;
62 static char buf[7*MB_LEN_MAX + 1];
63 
64 ATF_TC_WITHOUT_HEAD(c8rtomb_c_locale_test);
65 ATF_TC_BODY(c8rtomb_c_locale_test, tc)
66 {
67 	size_t n;
68 
69 	require_lc_ctype("C");
70 
71 	/*
72 	 * If the buffer argument is NULL, c8 is implicitly 0,
73 	 * c8rtomb() resets its internal state.
74 	 */
75 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
76 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n);
77 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n);
78 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n);
79 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n);
80 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n);
81 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n);
82 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n);
83 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n);
84 
85 	/* Null wide character. */
86 	memset(&s, 0, sizeof(s));
87 	memset(buf, 0xcc, sizeof(buf));
88 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n);
89 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
90 		(unsigned char)buf[1] == 0xcc),
91 	    "buf=[%02x %02x]", buf[0], buf[1]);
92 
93 	/* Latin letter A, internal state. */
94 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
95 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n);
96 
97 	/* Latin letter A. */
98 	memset(&s, 0, sizeof(s));
99 	memset(buf, 0xcc, sizeof(buf));
100 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 'A', &s)), 1, "n=%zu", n);
101 	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
102 		(unsigned char)buf[1] == 0xcc),
103 	    "buf=[%02x %02x]", buf[0], buf[1]);
104 
105 	/* Unicode character 'Pile of poo'. */
106 	memset(&s, 0, sizeof(s));
107 	memset(buf, 0xcc, sizeof(buf));
108 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
109 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
110 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
111 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xa9, &s)), (size_t)-1,
112 	    "n=%zu", n);
113 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
114 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
115 
116 	/* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */
117 	memset(&s, 0, sizeof(s));
118 	memset(buf, 0xcc, sizeof(buf));
119 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
120 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
121 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
122 		(unsigned char)buf[1] == 0xcc),
123 	    "buf=[%02x %02x]", buf[0], buf[1]);
124 
125 	memset(&s, 0, sizeof(s));
126 	memset(buf, 0xcc, sizeof(buf));
127 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
128 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
129 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
130 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
131 		(unsigned char)buf[1] == 0xcc),
132 	    "buf=[%02x %02x]", buf[0], buf[1]);
133 
134 	memset(&s, 0, sizeof(s));
135 	memset(buf, 0xcc, sizeof(buf));
136 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
137 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
138 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
139 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
140 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
141 		(unsigned char)buf[1] == 0xcc),
142 	    "buf=[%02x %02x]", buf[0], buf[1]);
143 }
144 
145 ATF_TC_WITHOUT_HEAD(c8rtomb_iso2022jp_locale_test);
146 ATF_TC_BODY(c8rtomb_iso2022jp_locale_test, tc)
147 {
148 	char *p;
149 	size_t n;
150 
151 	require_lc_ctype("ja_JP.ISO-2022-JP");
152 
153 	/*
154 	 * If the buffer argument is NULL, c8 is implicitly 0,
155 	 * c8rtomb() resets its internal state.
156 	 */
157 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
158 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n);
159 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n);
160 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n);
161 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n);
162 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n);
163 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n);
164 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n);
165 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n);
166 
167 	/* Null wide character. */
168 	memset(&s, 0, sizeof(s));
169 	memset(buf, 0xcc, sizeof(buf));
170 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n);
171 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
172 		(unsigned char)buf[1] == 0xcc),
173 	    "buf=[%02x %02x]", buf[0], buf[1]);
174 
175 	/* Latin letter A, internal state. */
176 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
177 	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n);
178 
179 	/*
180 	 * 1. U+0042 LATIN CAPITAL LETTER A
181 	 * 2. U+00A5 YEN SIGN
182 	 * 3. U+00A5 YEN SIGN (again, no shift needed)
183 	 * 4. U+30A2 KATAKANA LETTER A
184 	 * 5. U+30A2 KATAKANA LETTER A (again, no shift needed)
185 	 * 6. incomplete UTF-8 multibyte sequence -- no output
186 	 * 7. U+0000 NUL (plus shift sequence to initial state)
187 	 */
188 	memset(&s, 0, sizeof(s));
189 	memset(buf, 0xcc, sizeof(buf));
190 	p = buf;
191 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 'A', &s)), 1, "n=%zu", n); /* 1 */
192 	p += 1;
193 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 2 */
194 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 4, "n=%zu", n);
195 	p += 4;
196 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 3 */
197 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 1, "n=%zu", n);
198 	p += 1;
199 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 4 */
200 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
201 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 5, "n=%zu", n);
202 	p += 5;
203 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 5 */
204 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
205 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 2, "n=%zu", n);
206 	p += 2;
207 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 6 */
208 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
209 	ATF_CHECK_EQ_MSG((n = c8rtomb(p, '\0', &s)), 4, "n=%zu", n); /* 7 */
210 	p += 4;
211 	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
212 		(unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */
213 		(unsigned char)buf[2] == '(' &&
214 		(unsigned char)buf[3] == 'J' &&
215 		(unsigned char)buf[4] == 0x5c && /* YEN SIGN */
216 		(unsigned char)buf[5] == 0x5c && /* YEN SIGN */
217 		(unsigned char)buf[6] == 0x1b && /* shift JIS X 0208 */
218 		(unsigned char)buf[7] == '$' &&
219 		(unsigned char)buf[8] == 'B' &&
220 		(unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */
221 		(unsigned char)buf[10] == 0x22 &&
222 		(unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */
223 		(unsigned char)buf[12] == 0x22 &&
224 		(unsigned char)buf[13] == 0x1b && /* shift US-ASCII */
225 		(unsigned char)buf[14] == '(' &&
226 		(unsigned char)buf[15] == 'B' &&
227 		(unsigned char)buf[16] == '\0' &&
228 		(unsigned char)buf[17] == 0xcc),
229 	    "buf=[%02x %02x %02x %02x  %02x %02x %02x %02x "
230 	    " %02x %02x %02x %02x  %02x %02x %02x %02x "
231 	    " %02x %02x]",
232 	    buf[0], buf[1], buf[2], buf[3],
233 	    buf[4], buf[5], buf[6], buf[7],
234 	    buf[8], buf[9], buf[10], buf[11],
235 	    buf[12], buf[13], buf[14], buf[15],
236 	    buf[16], buf[17]);
237 }
238 
239 ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_1_test);
240 ATF_TC_BODY(c8rtomb_iso_8859_1_test, tc)
241 {
242 	size_t n;
243 
244 	require_lc_ctype("en_US.ISO8859-1");
245 
246 	/* Unicode character 'Euro sign'. */
247 	memset(&s, 0, sizeof(s));
248 	memset(buf, 0xcc, sizeof(buf));
249 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xe2, &s)), 0, "n=%zu", n);
250 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x82, &s)), 0, "n=%zu", n);
251 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xac, &s)), (size_t)-1,
252 	    "n=%zu", n);
253 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
254 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
255 }
256 
257 ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_15_test);
258 ATF_TC_BODY(c8rtomb_iso_8859_15_test, tc)
259 {
260 	size_t n;
261 
262 	require_lc_ctype("en_US.ISO8859-15");
263 
264 	/* Unicode character 'Euro sign'. */
265 	memset(&s, 0, sizeof(s));
266 	memset(buf, 0xcc, sizeof(buf));
267 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xe2, &s)), 0, "n=%zu", n);
268 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x82, &s)), 0, "n=%zu", n);
269 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xac, &s)), 1, "n=%zu", n);
270 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 &&
271 		(unsigned char)buf[1] == 0xcc),
272 	    "buf=[%02x %02x]", buf[0], buf[1]);
273 }
274 
275 ATF_TC_WITHOUT_HEAD(c8rtomb_utf_8_test);
276 ATF_TC_BODY(c8rtomb_utf_8_test, tc)
277 {
278 	size_t n;
279 
280 	require_lc_ctype("en_US.UTF-8");
281 
282 	/* Unicode character 'Pile of poo'. */
283 	memset(&s, 0, sizeof(s));
284 	memset(buf, 0xcc, sizeof(buf));
285 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
286 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
287 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
288 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xa9, &s)), 4, "n=%zu", n);
289 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 &&
290 		(unsigned char)buf[1] == 0x9f &&
291 		(unsigned char)buf[2] == 0x92 &&
292 		(unsigned char)buf[3] == 0xa9 &&
293 		(unsigned char)buf[4] == 0xcc),
294 	    "buf=[%02x %02x %02x %02x %02x]",
295 	    buf[0], buf[1], buf[2], buf[3], buf[4]);
296 
297 	/* Invalid code; 'Pile of poo' without the last byte. */
298 	memset(&s, 0, sizeof(s));
299 	memset(buf, 0xcc, sizeof(buf));
300 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
301 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
302 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
303 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 'A', &s)), (size_t)-1,
304 	    "n=%zu", n);
305 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
306 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
307 
308 	/* Invalid code; 'Pile of poo' without the first byte. */
309 	memset(&s, 0, sizeof(s));
310 	memset(buf, 0xcc, sizeof(buf));
311 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), (size_t)-1,
312 	    "n=%zu", n);
313 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
314 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
315 
316 	/* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */
317 	memset(&s, 0, sizeof(s));
318 	memset(buf, 0xcc, sizeof(buf));
319 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
320 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
321 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
322 		(unsigned char)buf[1] == 0xcc),
323 	    "buf=[%02x %02x]", buf[0], buf[1]);
324 
325 	memset(&s, 0, sizeof(s));
326 	memset(buf, 0xcc, sizeof(buf));
327 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
328 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
329 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
330 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
331 		(unsigned char)buf[1] == 0xcc),
332 	    "buf=[%02x %02x]", buf[0], buf[1]);
333 
334 	memset(&s, 0, sizeof(s));
335 	memset(buf, 0xcc, sizeof(buf));
336 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
337 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
338 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
339 	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
340 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
341 		(unsigned char)buf[1] == 0xcc),
342 	    "buf=[%02x %02x]", buf[0], buf[1]);
343 }
344 
345 ATF_TP_ADD_TCS(tp)
346 {
347 
348 	ATF_TP_ADD_TC(tp, c8rtomb_c_locale_test);
349 	ATF_TP_ADD_TC(tp, c8rtomb_iso2022jp_locale_test);
350 	ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_1_test);
351 	ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_15_test);
352 	ATF_TP_ADD_TC(tp, c8rtomb_utf_8_test);
353 
354 	return (atf_no_error());
355 }
356