1 /* $NetBSD: t_c8rtomb.c,v 1.7 2024/08/19 16:22:10 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2002 Tim J. Robbins 5 * All rights reserved. 6 * 7 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 /* 32 * Test program for c8rtomb() as specified by C23. 33 */ 34 35 #include <sys/cdefs.h> 36 __RCSID("$NetBSD: t_c8rtomb.c,v 1.7 2024/08/19 16:22:10 riastradh Exp $"); 37 38 #include <errno.h> 39 #include <limits.h> 40 #include <locale.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <uchar.h> 44 45 #include <atf-c.h> 46 47 static void 48 require_lc_ctype(const char *locale_name) 49 { 50 char *lc_ctype_set; 51 52 lc_ctype_set = setlocale(LC_CTYPE, locale_name); 53 if (lc_ctype_set == NULL) 54 atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d", 55 locale_name, errno); 56 57 ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0, 58 "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name); 59 } 60 61 static mbstate_t s; 62 static char buf[7*MB_LEN_MAX + 1]; 63 64 ATF_TC_WITHOUT_HEAD(c8rtomb_c_locale_test); 65 ATF_TC_BODY(c8rtomb_c_locale_test, tc) 66 { 67 size_t n; 68 69 require_lc_ctype("C"); 70 71 /* 72 * If the buffer argument is NULL, c8 is implicitly 0, 73 * c8rtomb() resets its internal state. 74 */ 75 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n); 76 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n); 77 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n); 78 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n); 79 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n); 80 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n); 81 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n); 82 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n); 83 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n); 84 85 /* Null wide character. */ 86 memset(&s, 0, sizeof(s)); 87 memset(buf, 0xcc, sizeof(buf)); 88 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n); 89 ATF_CHECK_MSG(((unsigned char)buf[0] == 0 && 90 (unsigned char)buf[1] == 0xcc), 91 "buf=[%02x %02x]", buf[0], buf[1]); 92 93 /* Latin letter A, internal state. */ 94 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n); 95 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n); 96 97 /* Latin letter A. */ 98 memset(&s, 0, sizeof(s)); 99 memset(buf, 0xcc, sizeof(buf)); 100 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 'A', &s)), 1, "n=%zu", n); 101 ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' && 102 (unsigned char)buf[1] == 0xcc), 103 "buf=[%02x %02x]", buf[0], buf[1]); 104 105 /* Unicode character 'Pile of poo'. */ 106 memset(&s, 0, sizeof(s)); 107 memset(buf, 0xcc, sizeof(buf)); 108 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n); 109 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n); 110 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n); 111 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xa9, &s)), (size_t)-1, 112 "n=%zu", n); 113 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); 114 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); 115 116 /* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */ 117 memset(&s, 0, sizeof(s)); 118 memset(buf, 0xcc, sizeof(buf)); 119 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n); 120 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n); 121 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' && 122 (unsigned char)buf[1] == 0xcc), 123 "buf=[%02x %02x]", buf[0], buf[1]); 124 125 memset(&s, 0, sizeof(s)); 126 memset(buf, 0xcc, sizeof(buf)); 127 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n); 128 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n); 129 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n); 130 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' && 131 (unsigned char)buf[1] == 0xcc), 132 "buf=[%02x %02x]", buf[0], buf[1]); 133 134 memset(&s, 0, sizeof(s)); 135 memset(buf, 0xcc, sizeof(buf)); 136 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n); 137 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n); 138 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n); 139 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n); 140 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' && 141 (unsigned char)buf[1] == 0xcc), 142 "buf=[%02x %02x]", buf[0], buf[1]); 143 } 144 145 ATF_TC_WITHOUT_HEAD(c8rtomb_iso2022jp_locale_test); 146 ATF_TC_BODY(c8rtomb_iso2022jp_locale_test, tc) 147 { 148 char *p; 149 size_t n; 150 151 require_lc_ctype("ja_JP.ISO-2022-JP"); 152 153 /* 154 * If the buffer argument is NULL, c8 is implicitly 0, 155 * c8rtomb() resets its internal state. 156 */ 157 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n); 158 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n); 159 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n); 160 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n); 161 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n); 162 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n); 163 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n); 164 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n); 165 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n); 166 167 /* Null wide character. */ 168 memset(&s, 0, sizeof(s)); 169 memset(buf, 0xcc, sizeof(buf)); 170 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n); 171 ATF_CHECK_MSG(((unsigned char)buf[0] == 0 && 172 (unsigned char)buf[1] == 0xcc), 173 "buf=[%02x %02x]", buf[0], buf[1]); 174 175 /* Latin letter A, internal state. */ 176 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n); 177 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n); 178 179 /* 180 * 1. U+0042 LATIN CAPITAL LETTER A 181 * 2. U+00A5 YEN SIGN 182 * 3. U+00A5 YEN SIGN (again, no shift needed) 183 * 4. U+30A2 KATAKANA LETTER A 184 * 5. U+30A2 KATAKANA LETTER A (again, no shift needed) 185 * 6. incomplete UTF-8 multibyte sequence -- no output 186 * 7. U+0000 NUL (plus shift sequence to initial state) 187 */ 188 memset(&s, 0, sizeof(s)); 189 memset(buf, 0xcc, sizeof(buf)); 190 p = buf; 191 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 'A', &s)), 1, "n=%zu", n); /* 1 */ 192 p += 1; 193 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 2 */ 194 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 4, "n=%zu", n); 195 p += 4; 196 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 3 */ 197 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 1, "n=%zu", n); 198 p += 1; 199 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 4 */ 200 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n); 201 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 5, "n=%zu", n); 202 p += 5; 203 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 5 */ 204 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n); 205 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 2, "n=%zu", n); 206 p += 2; 207 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 6 */ 208 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n); 209 ATF_CHECK_EQ_MSG((n = c8rtomb(p, '\0', &s)), 4, "n=%zu", n); /* 7 */ 210 p += 4; 211 ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' && 212 (unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */ 213 (unsigned char)buf[2] == '(' && 214 (unsigned char)buf[3] == 'J' && 215 (unsigned char)buf[4] == 0x5c && /* YEN SIGN */ 216 (unsigned char)buf[5] == 0x5c && /* YEN SIGN */ 217 (unsigned char)buf[6] == 0x1b && /* shift JIS X 0208 */ 218 (unsigned char)buf[7] == '$' && 219 (unsigned char)buf[8] == 'B' && 220 (unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */ 221 (unsigned char)buf[10] == 0x22 && 222 (unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */ 223 (unsigned char)buf[12] == 0x22 && 224 (unsigned char)buf[13] == 0x1b && /* shift US-ASCII */ 225 (unsigned char)buf[14] == '(' && 226 (unsigned char)buf[15] == 'B' && 227 (unsigned char)buf[16] == '\0' && 228 (unsigned char)buf[17] == 0xcc), 229 "buf=[%02x %02x %02x %02x %02x %02x %02x %02x " 230 " %02x %02x %02x %02x %02x %02x %02x %02x " 231 " %02x %02x]", 232 buf[0], buf[1], buf[2], buf[3], 233 buf[4], buf[5], buf[6], buf[7], 234 buf[8], buf[9], buf[10], buf[11], 235 buf[12], buf[13], buf[14], buf[15], 236 buf[16], buf[17]); 237 } 238 239 ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_1_test); 240 ATF_TC_BODY(c8rtomb_iso_8859_1_test, tc) 241 { 242 size_t n; 243 244 require_lc_ctype("en_US.ISO8859-1"); 245 246 /* Unicode character 'Euro sign'. */ 247 memset(&s, 0, sizeof(s)); 248 memset(buf, 0xcc, sizeof(buf)); 249 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xe2, &s)), 0, "n=%zu", n); 250 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x82, &s)), 0, "n=%zu", n); 251 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xac, &s)), (size_t)-1, 252 "n=%zu", n); 253 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); 254 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); 255 } 256 257 ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_15_test); 258 ATF_TC_BODY(c8rtomb_iso_8859_15_test, tc) 259 { 260 size_t n; 261 262 require_lc_ctype("en_US.ISO8859-15"); 263 264 /* Unicode character 'Euro sign'. */ 265 memset(&s, 0, sizeof(s)); 266 memset(buf, 0xcc, sizeof(buf)); 267 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xe2, &s)), 0, "n=%zu", n); 268 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x82, &s)), 0, "n=%zu", n); 269 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xac, &s)), 1, "n=%zu", n); 270 ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 && 271 (unsigned char)buf[1] == 0xcc), 272 "buf=[%02x %02x]", buf[0], buf[1]); 273 } 274 275 ATF_TC_WITHOUT_HEAD(c8rtomb_utf_8_test); 276 ATF_TC_BODY(c8rtomb_utf_8_test, tc) 277 { 278 size_t n; 279 280 require_lc_ctype("en_US.UTF-8"); 281 282 /* Unicode character 'Pile of poo'. */ 283 memset(&s, 0, sizeof(s)); 284 memset(buf, 0xcc, sizeof(buf)); 285 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n); 286 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n); 287 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n); 288 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xa9, &s)), 4, "n=%zu", n); 289 ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 && 290 (unsigned char)buf[1] == 0x9f && 291 (unsigned char)buf[2] == 0x92 && 292 (unsigned char)buf[3] == 0xa9 && 293 (unsigned char)buf[4] == 0xcc), 294 "buf=[%02x %02x %02x %02x %02x]", 295 buf[0], buf[1], buf[2], buf[3], buf[4]); 296 297 /* Invalid code; 'Pile of poo' without the last byte. */ 298 memset(&s, 0, sizeof(s)); 299 memset(buf, 0xcc, sizeof(buf)); 300 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n); 301 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n); 302 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n); 303 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 'A', &s)), (size_t)-1, 304 "n=%zu", n); 305 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); 306 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); 307 308 /* Invalid code; 'Pile of poo' without the first byte. */ 309 memset(&s, 0, sizeof(s)); 310 memset(buf, 0xcc, sizeof(buf)); 311 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), (size_t)-1, 312 "n=%zu", n); 313 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); 314 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); 315 316 /* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */ 317 memset(&s, 0, sizeof(s)); 318 memset(buf, 0xcc, sizeof(buf)); 319 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n); 320 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n); 321 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' && 322 (unsigned char)buf[1] == 0xcc), 323 "buf=[%02x %02x]", buf[0], buf[1]); 324 325 memset(&s, 0, sizeof(s)); 326 memset(buf, 0xcc, sizeof(buf)); 327 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n); 328 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n); 329 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n); 330 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' && 331 (unsigned char)buf[1] == 0xcc), 332 "buf=[%02x %02x]", buf[0], buf[1]); 333 334 memset(&s, 0, sizeof(s)); 335 memset(buf, 0xcc, sizeof(buf)); 336 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n); 337 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n); 338 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n); 339 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n); 340 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' && 341 (unsigned char)buf[1] == 0xcc), 342 "buf=[%02x %02x]", buf[0], buf[1]); 343 } 344 345 ATF_TP_ADD_TCS(tp) 346 { 347 348 ATF_TP_ADD_TC(tp, c8rtomb_c_locale_test); 349 ATF_TP_ADD_TC(tp, c8rtomb_iso2022jp_locale_test); 350 ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_1_test); 351 ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_15_test); 352 ATF_TP_ADD_TC(tp, c8rtomb_utf_8_test); 353 354 return (atf_no_error()); 355 } 356