xref: /netbsd-src/tests/lib/libc/locale/t_c16rtomb.c (revision a35ceff4b39ccce6de8bd3b28adf00e694090abc)
1 /*	$NetBSD: t_c16rtomb.c,v 1.6 2024/08/19 16:22:10 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2002 Tim J. Robbins
5  * All rights reserved.
6  *
7  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /*
32  * Test program for c16rtomb() as specified by ISO/IEC 9899:2011.
33  */
34 
35 #include <sys/cdefs.h>
36 __RCSID("$NetBSD: t_c16rtomb.c,v 1.6 2024/08/19 16:22:10 riastradh Exp $");
37 
38 #include <errno.h>
39 #include <limits.h>
40 #include <locale.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <uchar.h>
44 
45 #include <atf-c.h>
46 
47 static void
48 require_lc_ctype(const char *locale_name)
49 {
50 	char *lc_ctype_set;
51 
52 	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
53 	if (lc_ctype_set == NULL)
54 		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
55 		    locale_name, errno);
56 
57 	ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
58 	    "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
59 }
60 
61 static mbstate_t s;
62 static char buf[7*MB_LEN_MAX + 1];
63 
64 ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test);
65 ATF_TC_BODY(c16rtomb_c_locale_test, tc)
66 {
67 	size_t n;
68 
69 	require_lc_ctype("C");
70 
71 	/*
72 	 * If the buffer argument is NULL, c16 is implicitly 0,
73 	 * c16rtomb() resets its internal state.
74 	 */
75 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
76 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n);
77 
78 	/* Null wide character. */
79 	memset(&s, 0, sizeof(s));
80 	memset(buf, 0xcc, sizeof(buf));
81 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n);
82 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
83 		(unsigned char)buf[1] == 0xcc),
84 	    "buf=[%02x %02x]", buf[0], buf[1]);
85 
86 	/* Latin letter A, internal state. */
87 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
88 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n);
89 
90 	/* Latin letter A. */
91 	memset(&s, 0, sizeof(s));
92 	memset(buf, 0xcc, sizeof(buf));
93 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), 1, "n=%zu", n);
94 	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
95 		(unsigned char)buf[1] == 0xcc),
96 	    "buf=[%02x %02x]", buf[0], buf[1]);
97 
98 	/* Unicode character 'Pile of poo'. */
99 	memset(&s, 0, sizeof(s));
100 	memset(buf, 0xcc, sizeof(buf));
101 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
102 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
103 	    "n=%zu", n);
104 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
105 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
106 
107 	/* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */
108 	memset(&s, 0, sizeof(s));
109 	memset(buf, 0xcc, sizeof(buf));
110 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
111 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'\0', &s)), 1, "n=%zu", n);
112 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
113 		(unsigned char)buf[1] == 0xcc),
114 	    "buf=[%02x %02x]", buf[0], buf[1]);
115 }
116 
117 ATF_TC_WITHOUT_HEAD(c16rtomb_iso2022jp_locale_test);
118 ATF_TC_BODY(c16rtomb_iso2022jp_locale_test, tc)
119 {
120 	char *p;
121 	size_t n;
122 
123 	require_lc_ctype("ja_JP.ISO-2022-JP");
124 
125 	/*
126 	 * If the buffer argument is NULL, c16 is implicitly 0,
127 	 * c16rtomb() resets its internal state.
128 	 */
129 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
130 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n);
131 
132 	/* Null wide character. */
133 	memset(&s, 0, sizeof(s));
134 	memset(buf, 0xcc, sizeof(buf));
135 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n);
136 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
137 		(unsigned char)buf[1] == 0xcc),
138 	    "buf=[%02x %02x]", buf[0], buf[1]);
139 
140 	/* Latin letter A, internal state. */
141 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
142 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n);
143 
144 	/*
145 	 * 1. U+0042 LATIN CAPITAL LETTER A
146 	 * 2. U+00A5 YEN SIGN
147 	 * 3. U+00A5 YEN SIGN (again, no shift needed)
148 	 * 4. U+30A2 KATAKANA LETTER A
149 	 * 5. U+30A2 KATAKANA LETTER A (again, no shift needed)
150 	 * 6. incomplete UTF-16 surrogate pair -- no output
151 	 * 7. U+0000 NUL (plus shift sequence to initial state)
152 	 */
153 	memset(&s, 0, sizeof(s));
154 	memset(buf, 0xcc, sizeof(buf));
155 	p = buf;
156 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'A', &s)), 1, "n=%zu", n); /* 1 */
157 	p += 1;
158 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 4, "n=%zu", n); /* 2 */
159 	p += 4;
160 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 1, "n=%zu", n); /* 3 */
161 	p += 1;
162 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 5, "n=%zu", n); /* 4 */
163 	p += 5;
164 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 2, "n=%zu", n); /* 5 */
165 	p += 2;
166 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xd800, &s)), 0, "n=%zu", n); /* 6 */
167 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'\0', &s)), 4, "n=%zu", n); /* 7 */
168 	p += 4;
169 	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
170 		(unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */
171 		(unsigned char)buf[2] == '(' &&
172 		(unsigned char)buf[3] == 'J' &&
173 		(unsigned char)buf[4] == 0x5c && /* YEN SIGN */
174 		(unsigned char)buf[5] == 0x5c && /* YEN SIGN */
175 		(unsigned char)buf[6] == 0x1b && /* shift JIS X 0208 */
176 		(unsigned char)buf[7] == '$' &&
177 		(unsigned char)buf[8] == 'B' &&
178 		(unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */
179 		(unsigned char)buf[10] == 0x22 &&
180 		(unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */
181 		(unsigned char)buf[12] == 0x22 &&
182 		(unsigned char)buf[13] == 0x1b && /* shift US-ASCII */
183 		(unsigned char)buf[14] == '(' &&
184 		(unsigned char)buf[15] == 'B' &&
185 		(unsigned char)buf[16] == '\0' &&
186 		(unsigned char)buf[17] == 0xcc),
187 	    "buf=[%02x %02x %02x %02x  %02x %02x %02x %02x "
188 	    " %02x %02x %02x %02x  %02x %02x %02x %02x "
189 	    " %02x %02x]",
190 	    buf[0], buf[1], buf[2], buf[3],
191 	    buf[4], buf[5], buf[6], buf[7],
192 	    buf[8], buf[9], buf[10], buf[11],
193 	    buf[12], buf[13], buf[14], buf[15],
194 	    buf[16], buf[17]);
195 }
196 
197 ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test);
198 ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc)
199 {
200 	size_t n;
201 
202 	require_lc_ctype("en_US.ISO8859-1");
203 
204 	/* Unicode character 'Euro sign'. */
205 	memset(&s, 0, sizeof(s));
206 	memset(buf, 0xcc, sizeof(buf));
207 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), (size_t)-1,
208 	    "n=%zu", n);
209 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
210 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
211 }
212 
213 ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_15_test);
214 ATF_TC_BODY(c16rtomb_iso_8859_15_test, tc)
215 {
216 	size_t n;
217 
218 	require_lc_ctype("en_US.ISO8859-15");
219 
220 	/* Unicode character 'Euro sign'. */
221 	memset(&s, 0, sizeof(s));
222 	memset(buf, 0xcc, sizeof(buf));
223 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), 1, "n=%zu", n);
224 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 &&
225 		(unsigned char)buf[1] == 0xcc),
226 	    "buf=[%02x %02x]", buf[0], buf[1]);
227 }
228 
229 ATF_TC_WITHOUT_HEAD(c16rtomb_utf_8_test);
230 ATF_TC_BODY(c16rtomb_utf_8_test, tc)
231 {
232 	size_t n;
233 
234 	require_lc_ctype("en_US.UTF-8");
235 
236 	/* Unicode character 'Pile of poo'. */
237 	memset(&s, 0, sizeof(s));
238 	memset(buf, 0xcc, sizeof(buf));
239 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
240 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), 4, "n=%zu", n);
241 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 &&
242 		(unsigned char)buf[1] == 0x9f &&
243 		(unsigned char)buf[2] == 0x92 &&
244 		(unsigned char)buf[3] == 0xa9 &&
245 		(unsigned char)buf[4] == 0xcc),
246 	    "buf=[%02x %02x %02x %02x %02x]",
247 	    buf[0], buf[1], buf[2], buf[3], buf[4]);
248 
249 	/* Invalid code; 'Pile of poo' without the trail surrogate. */
250 	memset(&s, 0, sizeof(s));
251 	memset(buf, 0xcc, sizeof(buf));
252 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
253 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), (size_t)-1,
254 	    "n=%zu", n);
255 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
256 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
257 
258 	/* Invalid code; 'Pile of poo' without the lead surrogate. */
259 	memset(&s, 0, sizeof(s));
260 	memset(buf, 0xcc, sizeof(buf));
261 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
262 	    "n=%zu", n);
263 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
264 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
265 
266 	/* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */
267 	memset(&s, 0, sizeof(s));
268 	memset(buf, 0xcc, sizeof(buf));
269 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
270 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'\0', &s)), 1,
271 	    "n=%zu", n);
272 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
273 		(unsigned char)buf[1] == 0xcc),
274 	    "buf=[%02x %02x]", buf[0], buf[1]);
275 }
276 
277 ATF_TP_ADD_TCS(tp)
278 {
279 
280 	ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test);
281 	ATF_TP_ADD_TC(tp, c16rtomb_iso2022jp_locale_test);
282 	ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test);
283 	ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test);
284 	ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test);
285 
286 	return (atf_no_error());
287 }
288