1 /* $NetBSD: t_mbrtoc16.c,v 1.3 2024/08/20 17:43:09 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2002 Tim J. Robbins 5 * All rights reserved. 6 * 7 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 /* 32 * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011. 33 */ 34 35 #include <sys/cdefs.h> 36 __RCSID("$NetBSD: t_mbrtoc16.c,v 1.3 2024/08/20 17:43:09 riastradh Exp $"); 37 38 #include <errno.h> 39 #include <inttypes.h> 40 #include <limits.h> 41 #include <locale.h> 42 #include <string.h> 43 #include <uchar.h> 44 45 #include <atf-c.h> 46 47 static void 48 require_lc_ctype(const char *locale_name) 49 { 50 char *lc_ctype_set; 51 52 lc_ctype_set = setlocale(LC_CTYPE, locale_name); 53 if (lc_ctype_set == NULL) 54 atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d", 55 locale_name, errno); 56 57 ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0, 58 "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name); 59 } 60 61 static mbstate_t s; 62 static char16_t c16; 63 64 ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test); 65 ATF_TC_BODY(mbrtoc16_c_locale_test, tc) 66 { 67 size_t n; 68 69 require_lc_ctype("C"); 70 71 /* Null wide character, internal state. */ 72 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n); 73 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); 74 75 /* Null wide character. */ 76 memset(&s, 0, sizeof(s)); 77 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n); 78 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); 79 80 /* Latin letter A, internal state. */ 81 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n); 82 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n); 83 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, 84 (uint16_t)c16, (uint16_t)L'A'); 85 86 /* Latin letter A. */ 87 memset(&s, 0, sizeof(s)); 88 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n); 89 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, 90 (uint16_t)c16, (uint16_t)L'A'); 91 92 /* Incomplete character sequence. */ 93 c16 = L'z'; 94 memset(&s, 0, sizeof(s)); 95 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2, 96 "n=%zu", n); 97 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16, 98 (uint16_t)c16, (uint16_t)L'z'); 99 100 /* Check that mbrtoc16() doesn't access the buffer when n == 0. */ 101 c16 = L'z'; 102 memset(&s, 0, sizeof(s)); 103 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2, 104 "n=%zu", n); 105 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16, 106 (uint16_t)c16, (uint16_t)L'z'); 107 108 /* Check that mbrtoc16() doesn't read ahead too aggressively. */ 109 memset(&s, 0, sizeof(s)); 110 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n); 111 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, 112 (uint16_t)c16, (uint16_t)L'A'); 113 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n); 114 ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%"PRIx16" L'C'=U+%"PRIx16, 115 (uint16_t)c16, (uint16_t)L'C'); 116 } 117 118 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso2022jp_locale_test); 119 ATF_TC_BODY(mbrtoc16_iso2022jp_locale_test, tc) 120 { 121 size_t n; 122 123 require_lc_ctype("ja_JP.ISO-2022-JP"); 124 125 /* Null wide character, internal state. */ 126 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n); 127 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16); 128 129 /* Null wide character. */ 130 memset(&s, 0, sizeof(s)); 131 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n); 132 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16); 133 134 /* Latin letter A, internal state. */ 135 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n); 136 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n); 137 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16, 138 (uint16_t)c16, (uint16_t)L'A'); 139 140 /* Latin letter A. */ 141 memset(&s, 0, sizeof(s)); 142 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n); 143 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16, 144 (uint16_t)c16, (uint16_t)L'A'); 145 146 /* Incomplete character sequence. */ 147 c16 = L'z'; 148 memset(&s, 0, sizeof(s)); 149 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2, 150 "n=%zu", n); 151 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%04"PRIx16" L'z'=U+%04"PRIx16, 152 (uint16_t)c16, (uint16_t)L'z'); 153 154 /* Check that mbrtoc16() doesn't access the buffer when n == 0. */ 155 c16 = L'z'; 156 memset(&s, 0, sizeof(s)); 157 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2, 158 "n=%zu", n); 159 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%04"PRIx16" L'z'=U+%04"PRIx16, 160 (uint16_t)c16, (uint16_t)L'z'); 161 162 /* Check that mbrtoc16() doesn't read ahead too aggressively. */ 163 memset(&s, 0, sizeof(s)); 164 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n); 165 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16, 166 (uint16_t)c16, (uint16_t)L'A'); 167 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n); 168 ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%04"PRIx16" L'C'=U+%04"PRIx16, 169 (uint16_t)c16, (uint16_t)L'C'); 170 171 /* Incomplete character sequence (shift sequence only). */ 172 memset(&s, 0, sizeof(s)); 173 c16 = 0; 174 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J", 3, &s)), (size_t)-2, 175 "n=%zu", n); 176 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16); 177 178 /* Same as above, but complete (U+00A5 YEN SIGN). */ 179 memset(&s, 0, sizeof(s)); 180 c16 = 0; 181 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J\x5c", 4, &s)), 4, 182 "n=%zu", n); 183 ATF_CHECK_EQ_MSG(c16, 0xa5, "c16=U+%04"PRIx16, (uint16_t)c16); 184 185 /* Test restarting behaviour. */ 186 memset(&s, 0, sizeof(s)); 187 c16 = 0; 188 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(", 2, &s)), (size_t)-2, 189 "n=%zu", n); 190 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16); 191 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "J\x5c", 2, &s)), 2, "n=%zu", n); 192 ATF_CHECK_EQ_MSG(c16, 0xa5, "c16=U+%04"PRIx16, (uint16_t)c16); 193 194 /* 195 * Test shift sequence state in various increments: 196 * 1. U+0042 LATIN CAPITAL LETTER A 197 * 2. (shift ISO/IEC 646:JP) U+00A5 YEN SIGN 198 * 3. U+00A5 YEN SIGN 199 * 4. (shift JIS X 0208) U+30A2 KATAKANA LETTER A 200 * 5. U+30A2 KATAKANA LETTER A 201 * 6. (shift to initial state) U+0000 NUL 202 */ 203 memset(&s, 0, sizeof(s)); 204 c16 = 0; 205 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A\x1b(J", 4, &s)), 1, 206 "n=%zu", n); 207 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16, (uint16_t)c16); 208 c16 = 0; 209 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J", 3, &s)), (size_t)-2, 210 "n=%zu", n); 211 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16); 212 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x5c\x5c", 2, &s)), 1, 213 "n=%zu", n); 214 ATF_CHECK_EQ_MSG(c16, 0x00a5, "c16=U+%04"PRIx16, (uint16_t)c16); 215 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x5c\x1b$", 3, &s)), 1, 216 "n=%zu", n); 217 ATF_CHECK_EQ_MSG(c16, 0x00a5, "c16=U+%04"PRIx16, (uint16_t)c16); 218 c16 = 0x1234; 219 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b", 1, &s)), (size_t)-2, 220 "n=%zu", n); 221 ATF_CHECK_EQ_MSG(c16, 0x1234, "c16=U+%04"PRIx16, (uint16_t)c16); 222 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "$B\x25\x22", 4, &s)), 4, 223 "n=%zu", n); 224 ATF_CHECK_EQ_MSG(c16, 0x30a2, "c16=U+%04"PRIx16, (uint16_t)c16); 225 c16 = 0; 226 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x25", 1, &s)), (size_t)-2, 227 "n=%zu", n); 228 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16); 229 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x22\x1b(B\x00", 5, &s)), 1, 230 "n=%zu", n); 231 ATF_CHECK_EQ_MSG(c16, 0x30a2, "c16=U+%04"PRIx16, (uint16_t)c16); 232 c16 = 0; 233 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(", 2, &s)), (size_t)-2, 234 "n=%zu", n); 235 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16); 236 c16 = 42; 237 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "B\x00", 2, &s)), 0, "n=%zu", n); 238 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16); 239 } 240 241 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test); 242 ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc) 243 { 244 size_t n; 245 246 require_lc_ctype("en_US.ISO8859-1"); 247 248 /* Currency sign. */ 249 memset(&s, 0, sizeof(s)); 250 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n); 251 ATF_CHECK_EQ_MSG(c16, 0xa4, "c16=U+%"PRIx16, (uint16_t)c16); 252 } 253 254 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test); 255 ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc) 256 { 257 size_t n; 258 259 require_lc_ctype("en_US.ISO8859-15"); 260 261 /* Euro sign. */ 262 memset(&s, 0, sizeof(s)); 263 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n); 264 ATF_CHECK_EQ_MSG(c16, 0x20ac, "c16=U+%"PRIx16, (uint16_t)c16); 265 } 266 267 ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test); 268 ATF_TC_BODY(mbrtoc16_utf_8_test, tc) 269 { 270 size_t n; 271 272 require_lc_ctype("en_US.UTF-8"); 273 274 /* Null wide character, internal state. */ 275 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n); 276 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n); 277 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); 278 279 /* Null wide character. */ 280 memset(&s, 0, sizeof(s)); 281 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n); 282 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); 283 284 /* Latin letter A, internal state. */ 285 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n); 286 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n); 287 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, 288 (uint16_t)c16, (uint16_t)L'A'); 289 290 /* Latin letter A. */ 291 memset(&s, 0, sizeof(s)); 292 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n); 293 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, 294 (uint16_t)c16, (uint16_t)L'A'); 295 296 /* Incomplete character sequence (zero length). */ 297 c16 = L'z'; 298 memset(&s, 0, sizeof(s)); 299 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2, 300 "n=%zu", n); 301 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16, 302 (uint16_t)c16, (uint16_t)L'z'); 303 304 /* Incomplete character sequence (truncated double-byte). */ 305 memset(&s, 0, sizeof(s)); 306 c16 = 0; 307 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2, 308 "n=%zu", n); 309 310 /* Same as above, but complete. */ 311 memset(&s, 0, sizeof(s)); 312 c16 = 0; 313 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\x84", 2, &s)), 2, 314 "n=%zu", n); 315 ATF_CHECK_EQ_MSG(c16, 0xc4, "c16=U+%"PRIx16, (uint16_t)c16); 316 317 /* Test restarting behaviour. */ 318 memset(&s, 0, sizeof(s)); 319 c16 = 0; 320 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2, 321 "n=%zu", n); 322 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); 323 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xb7", 1, &s)), 1, "n=%zu", n); 324 ATF_CHECK_EQ_MSG(c16, 0xf7, "c16=U+%"PRIx16, (uint16_t)c16); 325 326 /* Surrogate pair. */ 327 memset(&s, 0, sizeof(s)); 328 c16 = 0; 329 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s)), 4, 330 "n=%zu", n); 331 ATF_CHECK_EQ_MSG(c16, 0xd83d, "c16=U+%"PRIx16, (uint16_t)c16); 332 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-3, 333 "n=%zu", n); 334 ATF_CHECK_EQ_MSG(c16, 0xdca9, "c16=U+%"PRIx16, (uint16_t)c16); 335 336 /* Letter e with acute, precomposed. */ 337 memset(&s, 0, sizeof(s)); 338 c16 = 0; 339 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\xa9", 2, &s)), 2, 340 "n=%zu", n); 341 ATF_CHECK_EQ_MSG(c16, 0xe9, "c16=U+%"PRIx16, (uint16_t)c16); 342 343 /* Letter e with acute, combined. */ 344 memset(&s, 0, sizeof(s)); 345 c16 = 0; 346 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x65\xcc\x81", 3, &s)), 1, 347 "n=%zu", n); 348 ATF_CHECK_EQ_MSG(c16, 0x65, "c16=U+%"PRIx16, (uint16_t)c16); 349 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xcc\x81", 2, &s)), 2, 350 "n=%zu", n); 351 ATF_CHECK_EQ_MSG(c16, 0x301, "c16=U+%"PRIx16, (uint16_t)c16); 352 } 353 354 ATF_TP_ADD_TCS(tp) 355 { 356 357 ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test); 358 ATF_TP_ADD_TC(tp, mbrtoc16_iso2022jp_locale_test); 359 ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test); 360 ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test); 361 ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test); 362 363 return (atf_no_error()); 364 } 365