1 /* $NetBSD: mystrtok.c,v 1.4 2023/12/23 20:30:46 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* mystrtok 3 6 /* SUMMARY 7 /* safe tokenizer 8 /* SYNOPSIS 9 /* #include <stringops.h> 10 /* 11 /* char *mystrtok(bufp, delimiters) 12 /* char **bufp; 13 /* const char *delimiters; 14 /* 15 /* char *mystrtokq(bufp, delimiters, parens) 16 /* char **bufp; 17 /* const char *delimiters; 18 /* const char *parens; 19 /* 20 /* char *mystrtokdq(bufp, delimiters) 21 /* char **bufp; 22 /* const char *delimiters; 23 /* 24 /* char *mystrtok_cw(bufp, delimiters, blame) 25 /* char **bufp; 26 /* const char *delimiters; 27 /* const char *blame; 28 /* 29 /* char *mystrtokq_cw(bufp, delimiters, parens, blame) 30 /* char **bufp; 31 /* const char *delimiters; 32 /* const char *parens; 33 /* const char *blame; 34 /* 35 /* char *mystrtokdq_cw(bufp, delimiters, blame) 36 /* char **bufp; 37 /* const char *delimiters; 38 /* const char *blame; 39 /* DESCRIPTION 40 /* mystrtok() splits a buffer on the specified \fIdelimiters\fR. 41 /* Tokens are delimited by runs of delimiters, so this routine 42 /* cannot return zero-length tokens. 43 /* 44 /* mystrtokq() is like mystrtok() but will not split text 45 /* between balanced parentheses. \fIparens\fR specifies the 46 /* opening and closing parenthesis (one of each). The set of 47 /* \fIparens\fR must be distinct from the set of \fIdelimiters\fR. 48 /* 49 /* mystrtokdq() is like mystrtok() but will not split text 50 /* between double quotes. The backslash character may be used 51 /* to escape characters. The double quote and backslash 52 /* character must not appear in the set of \fIdelimiters\fR. 53 /* 54 /* The \fIbufp\fR argument specifies the start of the search; it 55 /* is updated with each call. The input is destroyed. 56 /* 57 /* The result value is the next token, or a null pointer when the 58 /* end of the buffer was reached. 59 /* 60 /* mystrtok_cw(), mystrtokq_cw(), and mystrtokdq_cw, log a 61 /* warning and return null when the result would look like 62 /* comment. The \fBblame\fR argument provides context for 63 /* warning messages. Specify a null pointer to disable the 64 /* comment check. 65 /* LICENSE 66 /* .ad 67 /* .fi 68 /* The Secure Mailer license must be distributed with this software. 69 /* AUTHOR(S) 70 /* Wietse Venema 71 /* IBM T.J. Watson Research 72 /* P.O. Box 704 73 /* Yorktown Heights, NY 10598, USA 74 /* 75 /* Wietse Venema 76 /* Google, Inc. 77 /* 111 8th Avenue 78 /* New York, NY 10011, USA 79 /*--*/ 80 81 /* System library. */ 82 83 #include <sys_defs.h> 84 #include <string.h> 85 86 /* Utility library. */ 87 88 #include <msg.h> 89 #include <stringops.h> 90 91 /* mystrtok_warn - warn for #comment after other text */ 92 93 static void mystrtok_warn(const char *start, const char *bufp, const char *blame) 94 { 95 msg_warn("%s: #comment after other text is not allowed: %s %.20s...", 96 blame, start, bufp); 97 } 98 99 /* mystrtok - ABI compatibility wrapper */ 100 101 #undef mystrtok 102 103 char *mystrtok(char **src, const char *sep) 104 { 105 return (mystrtok_cw(src, sep, (char *) 0)); 106 } 107 108 /* mystrtok - safe tokenizer */ 109 110 char *mystrtok_cw(char **src, const char *sep, const char *blame) 111 { 112 char *start = *src; 113 char *end; 114 115 /* 116 * Skip over leading delimiters. 117 */ 118 start += strspn(start, sep); 119 if (*start == 0) { 120 *src = start; 121 return (0); 122 } 123 124 /* 125 * Separate off one token. 126 */ 127 end = start + strcspn(start, sep); 128 if (*end != 0) 129 *end++ = 0; 130 *src = end; 131 132 if (blame && *start == '#') { 133 mystrtok_warn(start, *src, blame); 134 return (0); 135 } else { 136 return (start); 137 } 138 } 139 140 /* mystrtokq - ABI compatibility wrapper */ 141 142 #undef mystrtokq 143 144 char *mystrtokq(char **src, const char *sep, const char *parens) 145 { 146 return (mystrtokq_cw(src, sep, parens, (char *) 0)); 147 } 148 149 /* mystrtokq_cw - safe tokenizer with quoting support */ 150 151 char *mystrtokq_cw(char **src, const char *sep, const char *parens, 152 const char *blame) 153 { 154 char *start = *src; 155 static char *cp; 156 int ch; 157 int level; 158 159 /* 160 * Skip over leading delimiters. 161 */ 162 start += strspn(start, sep); 163 if (*start == 0) { 164 *src = start; 165 return (0); 166 } 167 168 /* 169 * Parse out the next token. 170 */ 171 for (level = 0, cp = start; (ch = *(unsigned char *) cp) != 0; cp++) { 172 if (ch == parens[0]) { 173 level++; 174 } else if (level > 0 && ch == parens[1]) { 175 level--; 176 } else if (level == 0 && strchr(sep, ch) != 0) { 177 *cp++ = 0; 178 break; 179 } 180 } 181 *src = cp; 182 183 if (blame && *start == '#') { 184 mystrtok_warn(start, *src, blame); 185 return (0); 186 } else { 187 return (start); 188 } 189 } 190 191 /* mystrtokdq - ABI compatibility wrapper */ 192 193 #undef mystrtokdq 194 195 char *mystrtokdq(char **src, const char *sep) 196 { 197 return (mystrtokdq_cw(src, sep, (char *) 0)); 198 } 199 200 /* mystrtokdq_cw - safe tokenizer, double quote and backslash support */ 201 202 char *mystrtokdq_cw(char **src, const char *sep, const char *blame) 203 { 204 char *cp = *src; 205 char *start; 206 207 /* 208 * Skip leading delimiters. 209 */ 210 cp += strspn(cp, sep); 211 212 /* 213 * Skip to next unquoted space or comma. 214 */ 215 if (*cp == 0) { 216 start = 0; 217 } else { 218 int in_quotes; 219 220 for (in_quotes = 0, start = cp; *cp; cp++) { 221 if (*cp == '\\') { 222 if (*++cp == 0) 223 break; 224 } else if (*cp == '"') { 225 in_quotes = !in_quotes; 226 } else if (!in_quotes && strchr(sep, *(unsigned char *) cp) != 0) { 227 *cp++ = 0; 228 break; 229 } 230 } 231 } 232 *src = cp; 233 234 if (blame && start && *start == '#') { 235 mystrtok_warn(start, *src, blame); 236 return (0); 237 } else { 238 return (start); 239 } 240 } 241 242 #ifdef TEST 243 244 /* 245 * Test program. 246 */ 247 #include "msg.h" 248 #include "mymalloc.h" 249 250 /* 251 * The following needs to be large enough to include a null terminator in 252 * every testcase.expected field. 253 */ 254 #define EXPECT_SIZE 5 255 256 struct testcase { 257 const char *action; 258 const char *input; 259 const char *expected[EXPECT_SIZE]; 260 }; 261 static const struct testcase testcases[] = { 262 {"mystrtok", ""}, 263 {"mystrtok", " foo ", {"foo"}}, 264 {"mystrtok", " foo bar ", {"foo", "bar"}}, 265 {"mystrtokq", ""}, 266 {"mystrtokq", "foo bar", {"foo", "bar"}}, 267 {"mystrtokq", "{ bar } ", {"{ bar }"}}, 268 {"mystrtokq", "foo { bar } baz", {"foo", "{ bar }", "baz"}}, 269 {"mystrtokq", "foo{ bar } baz", {"foo{ bar }", "baz"}}, 270 {"mystrtokq", "foo { bar }baz", {"foo", "{ bar }baz"}}, 271 {"mystrtokdq", ""}, 272 {"mystrtokdq", " foo ", {"foo"}}, 273 {"mystrtokdq", " foo bar ", {"foo", "bar"}}, 274 {"mystrtokdq", " foo\\ bar ", {"foo\\ bar"}}, 275 {"mystrtokdq", " foo \\\" bar", {"foo", "\\\"", "bar"}}, 276 {"mystrtokdq", " foo \" bar baz\" ", {"foo", "\" bar baz\""}}, 277 {"mystrtok_cw", "#after text"}, 278 {"mystrtok_cw", "before-text #after text", {"before-text"}}, 279 {"mystrtokq_cw", "#after text"}, 280 {"mystrtokq_cw", "{ before text } #after text", "{ before text }"}, 281 {"mystrtokdq_cw", "#after text"}, 282 {"mystrtokdq_cw", "\"before text\" #after text", {"\"before text\""}}, 283 }; 284 285 int main(void) 286 { 287 const struct testcase *tp; 288 char *actual; 289 int pass; 290 int fail; 291 int match; 292 int n; 293 294 #define NUM_TESTS sizeof(testcases)/sizeof(testcases[0]) 295 #define STR_OR_NULL(s) ((s) ? (s) : "null") 296 297 for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) { 298 char *saved_input = mystrdup(tp->input); 299 char *cp = saved_input; 300 301 msg_info("RUN test case %ld %s >%s<", 302 (long) (tp - testcases), tp->action, tp->input); 303 #if 0 304 msg_info("action=%s", tp->action); 305 msg_info("input=%s", tp->input); 306 for (n = 0; tp->expected[n]; tp++) 307 msg_info("expected[%d]=%s", n, tp->expected[n]); 308 #endif 309 310 for (n = 0; n < EXPECT_SIZE; n++) { 311 if (strcmp(tp->action, "mystrtok") == 0) { 312 actual = mystrtok(&cp, CHARS_SPACE); 313 } else if (strcmp(tp->action, "mystrtokq") == 0) { 314 actual = mystrtokq(&cp, CHARS_SPACE, CHARS_BRACE); 315 } else if (strcmp(tp->action, "mystrtokdq") == 0) { 316 actual = mystrtokdq(&cp, CHARS_SPACE); 317 } else if (strcmp(tp->action, "mystrtok_cw") == 0) { 318 actual = mystrtok_cw(&cp, CHARS_SPACE, "test"); 319 } else if (strcmp(tp->action, "mystrtokq_cw") == 0) { 320 actual = mystrtokq_cw(&cp, CHARS_SPACE, CHARS_BRACE, "test"); 321 } else if (strcmp(tp->action, "mystrtokdq_cw") == 0) { 322 actual = mystrtokdq_cw(&cp, CHARS_SPACE, "test"); 323 } else { 324 msg_panic("invalid command: %s", tp->action); 325 } 326 if ((match = (actual && tp->expected[n]) ? 327 (strcmp(actual, tp->expected[n]) == 0) : 328 (actual == tp->expected[n])) != 0) { 329 if (actual == 0) { 330 msg_info("PASS test %ld", (long) (tp - testcases)); 331 pass++; 332 break; 333 } 334 } else { 335 msg_warn("expected: >%s<, got: >%s<", 336 STR_OR_NULL(tp->expected[n]), STR_OR_NULL(actual)); 337 msg_info("FAIL test %ld", (long) (tp - testcases)); 338 fail++; 339 break; 340 } 341 } 342 if (n >= EXPECT_SIZE) 343 msg_panic("need to increase EXPECT_SIZE"); 344 myfree(saved_input); 345 } 346 return (fail > 0); 347 } 348 349 #endif 350