1 /* $NetBSD: mystrtok.c,v 1.3 2022/10/08 16:12:50 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* mystrtok 3 6 /* SUMMARY 7 /* safe tokenizer 8 /* SYNOPSIS 9 /* #include <stringops.h> 10 /* 11 /* char *mystrtok(bufp, delimiters) 12 /* char **bufp; 13 /* const char *delimiters; 14 /* 15 /* char *mystrtokq(bufp, delimiters, parens) 16 /* char **bufp; 17 /* const char *delimiters; 18 /* const char *parens; 19 /* 20 /* char *mystrtokdq(bufp, delimiters) 21 /* char **bufp; 22 /* const char *delimiters; 23 /* DESCRIPTION 24 /* mystrtok() splits a buffer on the specified \fIdelimiters\fR. 25 /* Tokens are delimited by runs of delimiters, so this routine 26 /* cannot return zero-length tokens. 27 /* 28 /* mystrtokq() is like mystrtok() but will not split text 29 /* between balanced parentheses. \fIparens\fR specifies the 30 /* opening and closing parenthesis (one of each). The set of 31 /* \fIparens\fR must be distinct from the set of \fIdelimiters\fR. 32 /* 33 /* mystrtokdq() is like mystrtok() but will not split text 34 /* between double quotes. The backslash character may be used 35 /* to escape characters. The double quote and backslash 36 /* character must not appear in the set of \fIdelimiters\fR. 37 /* 38 /* The \fIbufp\fR argument specifies the start of the search; it 39 /* is updated with each call. The input is destroyed. 40 /* 41 /* The result value is the next token, or a null pointer when the 42 /* end of the buffer was reached. 43 /* LICENSE 44 /* .ad 45 /* .fi 46 /* The Secure Mailer license must be distributed with this software. 47 /* AUTHOR(S) 48 /* Wietse Venema 49 /* IBM T.J. Watson Research 50 /* P.O. Box 704 51 /* Yorktown Heights, NY 10598, USA 52 /* 53 /* Wietse Venema 54 /* Google, Inc. 55 /* 111 8th Avenue 56 /* New York, NY 10011, USA 57 /*--*/ 58 59 /* System library. */ 60 61 #include "sys_defs.h" 62 #include <string.h> 63 64 /* Utility library. */ 65 66 #include "stringops.h" 67 68 /* mystrtok - safe tokenizer */ 69 70 char *mystrtok(char **src, const char *sep) 71 { 72 char *start = *src; 73 char *end; 74 75 /* 76 * Skip over leading delimiters. 77 */ 78 start += strspn(start, sep); 79 if (*start == 0) { 80 *src = start; 81 return (0); 82 } 83 84 /* 85 * Separate off one token. 86 */ 87 end = start + strcspn(start, sep); 88 if (*end != 0) 89 *end++ = 0; 90 *src = end; 91 return (start); 92 } 93 94 /* mystrtokq - safe tokenizer with quoting support */ 95 96 char *mystrtokq(char **src, const char *sep, const char *parens) 97 { 98 char *start = *src; 99 static char *cp; 100 int ch; 101 int level; 102 103 /* 104 * Skip over leading delimiters. 105 */ 106 start += strspn(start, sep); 107 if (*start == 0) { 108 *src = start; 109 return (0); 110 } 111 112 /* 113 * Parse out the next token. 114 */ 115 for (level = 0, cp = start; (ch = *(unsigned char *) cp) != 0; cp++) { 116 if (ch == parens[0]) { 117 level++; 118 } else if (level > 0 && ch == parens[1]) { 119 level--; 120 } else if (level == 0 && strchr(sep, ch) != 0) { 121 *cp++ = 0; 122 break; 123 } 124 } 125 *src = cp; 126 return (start); 127 } 128 129 /* mystrtokdq - safe tokenizer, double quote and backslash support */ 130 131 char *mystrtokdq(char **src, const char *sep) 132 { 133 char *cp = *src; 134 char *start; 135 136 /* 137 * Skip leading delimiters. 138 */ 139 cp += strspn(cp, sep); 140 141 /* 142 * Skip to next unquoted space or comma. 143 */ 144 if (*cp == 0) { 145 start = 0; 146 } else { 147 int in_quotes; 148 149 for (in_quotes = 0, start = cp; *cp; cp++) { 150 if (*cp == '\\') { 151 if (*++cp == 0) 152 break; 153 } else if (*cp == '"') { 154 in_quotes = !in_quotes; 155 } else if (!in_quotes && strchr(sep, *(unsigned char *) cp) != 0) { 156 *cp++ = 0; 157 break; 158 } 159 } 160 } 161 *src = cp; 162 return (start); 163 } 164 165 #ifdef TEST 166 167 /* 168 * Test program. 169 */ 170 #include "msg.h" 171 #include "mymalloc.h" 172 173 /* 174 * The following needs to be large enough to include a null terminator in 175 * every testcase.expected field. 176 */ 177 #define EXPECT_SIZE 5 178 179 struct testcase { 180 const char *action; 181 const char *input; 182 const char *expected[EXPECT_SIZE]; 183 }; 184 static const struct testcase testcases[] = { 185 {"mystrtok", ""}, 186 {"mystrtok", " foo ", {"foo"}}, 187 {"mystrtok", " foo bar ", {"foo", "bar"}}, 188 {"mystrtokq", ""}, 189 {"mystrtokq", "foo bar", {"foo", "bar"}}, 190 {"mystrtokq", "{ bar } ", {"{ bar }"}}, 191 {"mystrtokq", "foo { bar } baz", {"foo", "{ bar }", "baz"}}, 192 {"mystrtokq", "foo{ bar } baz", {"foo{ bar }", "baz"}}, 193 {"mystrtokq", "foo { bar }baz", {"foo", "{ bar }baz"}}, 194 {"mystrtokdq", ""}, 195 {"mystrtokdq", " foo ", {"foo"}}, 196 {"mystrtokdq", " foo bar ", {"foo", "bar"}}, 197 {"mystrtokdq", " foo\\ bar ", {"foo\\ bar"}}, 198 {"mystrtokdq", " foo \\\" bar", {"foo", "\\\"", "bar"}}, 199 {"mystrtokdq", " foo \" bar baz\" ", {"foo", "\" bar baz\""}}, 200 }; 201 202 int main(void) 203 { 204 const struct testcase *tp; 205 char *actual; 206 int pass; 207 int fail; 208 int match; 209 int n; 210 211 #define NUM_TESTS sizeof(testcases)/sizeof(testcases[0]) 212 #define STR_OR_NULL(s) ((s) ? (s) : "null") 213 214 for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) { 215 char *saved_input = mystrdup(tp->input); 216 char *cp = saved_input; 217 218 msg_info("RUN test case %ld %s >%s<", 219 (long) (tp - testcases), tp->action, tp->input); 220 #if 0 221 msg_info("action=%s", tp->action); 222 msg_info("input=%s", tp->input); 223 for (n = 0; tp->expected[n]; tp++) 224 msg_info("expected[%d]=%s", n, tp->expected[n]); 225 #endif 226 227 for (n = 0; n < EXPECT_SIZE; n++) { 228 if (strcmp(tp->action, "mystrtok") == 0) { 229 actual = mystrtok(&cp, CHARS_SPACE); 230 } else if (strcmp(tp->action, "mystrtokq") == 0) { 231 actual = mystrtokq(&cp, CHARS_SPACE, CHARS_BRACE); 232 } else if (strcmp(tp->action, "mystrtokdq") == 0) { 233 actual = mystrtokdq(&cp, CHARS_SPACE); 234 } else { 235 msg_panic("invalid command: %s", tp->action); 236 } 237 if ((match = (actual && tp->expected[n]) ? 238 (strcmp(actual, tp->expected[n]) == 0) : 239 (actual == tp->expected[n])) != 0) { 240 if (actual == 0) { 241 msg_info("PASS test %ld", (long) (tp - testcases)); 242 pass++; 243 break; 244 } 245 } else { 246 msg_warn("expected: >%s<, got: >%s<", 247 STR_OR_NULL(tp->expected[n]), STR_OR_NULL(actual)); 248 msg_info("FAIL test %ld", (long) (tp - testcases)); 249 fail++; 250 break; 251 } 252 } 253 if (n >= EXPECT_SIZE) 254 msg_panic("need to increase EXPECT_SIZE"); 255 myfree(saved_input); 256 } 257 return (fail > 0); 258 } 259 260 #endif 261