1 /* $NetBSD: t_c16rtomb.c,v 1.6 2024/08/19 16:22:10 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2002 Tim J. Robbins 5 * All rights reserved. 6 * 7 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 /* 32 * Test program for c16rtomb() as specified by ISO/IEC 9899:2011. 33 */ 34 35 #include <sys/cdefs.h> 36 __RCSID("$NetBSD: t_c16rtomb.c,v 1.6 2024/08/19 16:22:10 riastradh Exp $"); 37 38 #include <errno.h> 39 #include <limits.h> 40 #include <locale.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <uchar.h> 44 45 #include <atf-c.h> 46 47 static void 48 require_lc_ctype(const char *locale_name) 49 { 50 char *lc_ctype_set; 51 52 lc_ctype_set = setlocale(LC_CTYPE, locale_name); 53 if (lc_ctype_set == NULL) 54 atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d", 55 locale_name, errno); 56 57 ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0, 58 "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name); 59 } 60 61 static mbstate_t s; 62 static char buf[7*MB_LEN_MAX + 1]; 63 64 ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test); 65 ATF_TC_BODY(c16rtomb_c_locale_test, tc) 66 { 67 size_t n; 68 69 require_lc_ctype("C"); 70 71 /* 72 * If the buffer argument is NULL, c16 is implicitly 0, 73 * c16rtomb() resets its internal state. 74 */ 75 ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n); 76 ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n); 77 78 /* Null wide character. */ 79 memset(&s, 0, sizeof(s)); 80 memset(buf, 0xcc, sizeof(buf)); 81 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n); 82 ATF_CHECK_MSG(((unsigned char)buf[0] == 0 && 83 (unsigned char)buf[1] == 0xcc), 84 "buf=[%02x %02x]", buf[0], buf[1]); 85 86 /* Latin letter A, internal state. */ 87 ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n); 88 ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n); 89 90 /* Latin letter A. */ 91 memset(&s, 0, sizeof(s)); 92 memset(buf, 0xcc, sizeof(buf)); 93 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), 1, "n=%zu", n); 94 ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' && 95 (unsigned char)buf[1] == 0xcc), 96 "buf=[%02x %02x]", buf[0], buf[1]); 97 98 /* Unicode character 'Pile of poo'. */ 99 memset(&s, 0, sizeof(s)); 100 memset(buf, 0xcc, sizeof(buf)); 101 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n); 102 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1, 103 "n=%zu", n); 104 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); 105 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); 106 107 /* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */ 108 memset(&s, 0, sizeof(s)); 109 memset(buf, 0xcc, sizeof(buf)); 110 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n); 111 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'\0', &s)), 1, "n=%zu", n); 112 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' && 113 (unsigned char)buf[1] == 0xcc), 114 "buf=[%02x %02x]", buf[0], buf[1]); 115 } 116 117 ATF_TC_WITHOUT_HEAD(c16rtomb_iso2022jp_locale_test); 118 ATF_TC_BODY(c16rtomb_iso2022jp_locale_test, tc) 119 { 120 char *p; 121 size_t n; 122 123 require_lc_ctype("ja_JP.ISO-2022-JP"); 124 125 /* 126 * If the buffer argument is NULL, c16 is implicitly 0, 127 * c16rtomb() resets its internal state. 128 */ 129 ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n); 130 ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n); 131 132 /* Null wide character. */ 133 memset(&s, 0, sizeof(s)); 134 memset(buf, 0xcc, sizeof(buf)); 135 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n); 136 ATF_CHECK_MSG(((unsigned char)buf[0] == 0 && 137 (unsigned char)buf[1] == 0xcc), 138 "buf=[%02x %02x]", buf[0], buf[1]); 139 140 /* Latin letter A, internal state. */ 141 ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n); 142 ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n); 143 144 /* 145 * 1. U+0042 LATIN CAPITAL LETTER A 146 * 2. U+00A5 YEN SIGN 147 * 3. U+00A5 YEN SIGN (again, no shift needed) 148 * 4. U+30A2 KATAKANA LETTER A 149 * 5. U+30A2 KATAKANA LETTER A (again, no shift needed) 150 * 6. incomplete UTF-16 surrogate pair -- no output 151 * 7. U+0000 NUL (plus shift sequence to initial state) 152 */ 153 memset(&s, 0, sizeof(s)); 154 memset(buf, 0xcc, sizeof(buf)); 155 p = buf; 156 ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'A', &s)), 1, "n=%zu", n); /* 1 */ 157 p += 1; 158 ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 4, "n=%zu", n); /* 2 */ 159 p += 4; 160 ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 1, "n=%zu", n); /* 3 */ 161 p += 1; 162 ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 5, "n=%zu", n); /* 4 */ 163 p += 5; 164 ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 2, "n=%zu", n); /* 5 */ 165 p += 2; 166 ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xd800, &s)), 0, "n=%zu", n); /* 6 */ 167 ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'\0', &s)), 4, "n=%zu", n); /* 7 */ 168 p += 4; 169 ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' && 170 (unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */ 171 (unsigned char)buf[2] == '(' && 172 (unsigned char)buf[3] == 'J' && 173 (unsigned char)buf[4] == 0x5c && /* YEN SIGN */ 174 (unsigned char)buf[5] == 0x5c && /* YEN SIGN */ 175 (unsigned char)buf[6] == 0x1b && /* shift JIS X 0208 */ 176 (unsigned char)buf[7] == '$' && 177 (unsigned char)buf[8] == 'B' && 178 (unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */ 179 (unsigned char)buf[10] == 0x22 && 180 (unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */ 181 (unsigned char)buf[12] == 0x22 && 182 (unsigned char)buf[13] == 0x1b && /* shift US-ASCII */ 183 (unsigned char)buf[14] == '(' && 184 (unsigned char)buf[15] == 'B' && 185 (unsigned char)buf[16] == '\0' && 186 (unsigned char)buf[17] == 0xcc), 187 "buf=[%02x %02x %02x %02x %02x %02x %02x %02x " 188 " %02x %02x %02x %02x %02x %02x %02x %02x " 189 " %02x %02x]", 190 buf[0], buf[1], buf[2], buf[3], 191 buf[4], buf[5], buf[6], buf[7], 192 buf[8], buf[9], buf[10], buf[11], 193 buf[12], buf[13], buf[14], buf[15], 194 buf[16], buf[17]); 195 } 196 197 ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test); 198 ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc) 199 { 200 size_t n; 201 202 require_lc_ctype("en_US.ISO8859-1"); 203 204 /* Unicode character 'Euro sign'. */ 205 memset(&s, 0, sizeof(s)); 206 memset(buf, 0xcc, sizeof(buf)); 207 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), (size_t)-1, 208 "n=%zu", n); 209 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); 210 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); 211 } 212 213 ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_15_test); 214 ATF_TC_BODY(c16rtomb_iso_8859_15_test, tc) 215 { 216 size_t n; 217 218 require_lc_ctype("en_US.ISO8859-15"); 219 220 /* Unicode character 'Euro sign'. */ 221 memset(&s, 0, sizeof(s)); 222 memset(buf, 0xcc, sizeof(buf)); 223 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), 1, "n=%zu", n); 224 ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 && 225 (unsigned char)buf[1] == 0xcc), 226 "buf=[%02x %02x]", buf[0], buf[1]); 227 } 228 229 ATF_TC_WITHOUT_HEAD(c16rtomb_utf_8_test); 230 ATF_TC_BODY(c16rtomb_utf_8_test, tc) 231 { 232 size_t n; 233 234 require_lc_ctype("en_US.UTF-8"); 235 236 /* Unicode character 'Pile of poo'. */ 237 memset(&s, 0, sizeof(s)); 238 memset(buf, 0xcc, sizeof(buf)); 239 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n); 240 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), 4, "n=%zu", n); 241 ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 && 242 (unsigned char)buf[1] == 0x9f && 243 (unsigned char)buf[2] == 0x92 && 244 (unsigned char)buf[3] == 0xa9 && 245 (unsigned char)buf[4] == 0xcc), 246 "buf=[%02x %02x %02x %02x %02x]", 247 buf[0], buf[1], buf[2], buf[3], buf[4]); 248 249 /* Invalid code; 'Pile of poo' without the trail surrogate. */ 250 memset(&s, 0, sizeof(s)); 251 memset(buf, 0xcc, sizeof(buf)); 252 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n); 253 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), (size_t)-1, 254 "n=%zu", n); 255 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); 256 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); 257 258 /* Invalid code; 'Pile of poo' without the lead surrogate. */ 259 memset(&s, 0, sizeof(s)); 260 memset(buf, 0xcc, sizeof(buf)); 261 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1, 262 "n=%zu", n); 263 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); 264 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); 265 266 /* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */ 267 memset(&s, 0, sizeof(s)); 268 memset(buf, 0xcc, sizeof(buf)); 269 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n); 270 ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'\0', &s)), 1, 271 "n=%zu", n); 272 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' && 273 (unsigned char)buf[1] == 0xcc), 274 "buf=[%02x %02x]", buf[0], buf[1]); 275 } 276 277 ATF_TP_ADD_TCS(tp) 278 { 279 280 ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test); 281 ATF_TP_ADD_TC(tp, c16rtomb_iso2022jp_locale_test); 282 ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test); 283 ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test); 284 ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test); 285 286 return (atf_no_error()); 287 } 288