1 /* $NetBSD: mystrtok.c,v 1.4 2023/12/23 20:30:46 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* mystrtok 3
6 /* SUMMARY
7 /* safe tokenizer
8 /* SYNOPSIS
9 /* #include <stringops.h>
10 /*
11 /* char *mystrtok(bufp, delimiters)
12 /* char **bufp;
13 /* const char *delimiters;
14 /*
15 /* char *mystrtokq(bufp, delimiters, parens)
16 /* char **bufp;
17 /* const char *delimiters;
18 /* const char *parens;
19 /*
20 /* char *mystrtokdq(bufp, delimiters)
21 /* char **bufp;
22 /* const char *delimiters;
23 /*
24 /* char *mystrtok_cw(bufp, delimiters, blame)
25 /* char **bufp;
26 /* const char *delimiters;
27 /* const char *blame;
28 /*
29 /* char *mystrtokq_cw(bufp, delimiters, parens, blame)
30 /* char **bufp;
31 /* const char *delimiters;
32 /* const char *parens;
33 /* const char *blame;
34 /*
35 /* char *mystrtokdq_cw(bufp, delimiters, blame)
36 /* char **bufp;
37 /* const char *delimiters;
38 /* const char *blame;
39 /* DESCRIPTION
40 /* mystrtok() splits a buffer on the specified \fIdelimiters\fR.
41 /* Tokens are delimited by runs of delimiters, so this routine
42 /* cannot return zero-length tokens.
43 /*
44 /* mystrtokq() is like mystrtok() but will not split text
45 /* between balanced parentheses. \fIparens\fR specifies the
46 /* opening and closing parenthesis (one of each). The set of
47 /* \fIparens\fR must be distinct from the set of \fIdelimiters\fR.
48 /*
49 /* mystrtokdq() is like mystrtok() but will not split text
50 /* between double quotes. The backslash character may be used
51 /* to escape characters. The double quote and backslash
52 /* character must not appear in the set of \fIdelimiters\fR.
53 /*
54 /* The \fIbufp\fR argument specifies the start of the search; it
55 /* is updated with each call. The input is destroyed.
56 /*
57 /* The result value is the next token, or a null pointer when the
58 /* end of the buffer was reached.
59 /*
60 /* mystrtok_cw(), mystrtokq_cw(), and mystrtokdq_cw, log a
61 /* warning and return null when the result would look like
62 /* comment. The \fBblame\fR argument provides context for
63 /* warning messages. Specify a null pointer to disable the
64 /* comment check.
65 /* LICENSE
66 /* .ad
67 /* .fi
68 /* The Secure Mailer license must be distributed with this software.
69 /* AUTHOR(S)
70 /* Wietse Venema
71 /* IBM T.J. Watson Research
72 /* P.O. Box 704
73 /* Yorktown Heights, NY 10598, USA
74 /*
75 /* Wietse Venema
76 /* Google, Inc.
77 /* 111 8th Avenue
78 /* New York, NY 10011, USA
79 /*--*/
80
81 /* System library. */
82
83 #include <sys_defs.h>
84 #include <string.h>
85
86 /* Utility library. */
87
88 #include <msg.h>
89 #include <stringops.h>
90
91 /* mystrtok_warn - warn for #comment after other text */
92
mystrtok_warn(const char * start,const char * bufp,const char * blame)93 static void mystrtok_warn(const char *start, const char *bufp, const char *blame)
94 {
95 msg_warn("%s: #comment after other text is not allowed: %s %.20s...",
96 blame, start, bufp);
97 }
98
99 /* mystrtok - ABI compatibility wrapper */
100
101 #undef mystrtok
102
mystrtok(char ** src,const char * sep)103 char *mystrtok(char **src, const char *sep)
104 {
105 return (mystrtok_cw(src, sep, (char *) 0));
106 }
107
108 /* mystrtok - safe tokenizer */
109
mystrtok_cw(char ** src,const char * sep,const char * blame)110 char *mystrtok_cw(char **src, const char *sep, const char *blame)
111 {
112 char *start = *src;
113 char *end;
114
115 /*
116 * Skip over leading delimiters.
117 */
118 start += strspn(start, sep);
119 if (*start == 0) {
120 *src = start;
121 return (0);
122 }
123
124 /*
125 * Separate off one token.
126 */
127 end = start + strcspn(start, sep);
128 if (*end != 0)
129 *end++ = 0;
130 *src = end;
131
132 if (blame && *start == '#') {
133 mystrtok_warn(start, *src, blame);
134 return (0);
135 } else {
136 return (start);
137 }
138 }
139
140 /* mystrtokq - ABI compatibility wrapper */
141
142 #undef mystrtokq
143
mystrtokq(char ** src,const char * sep,const char * parens)144 char *mystrtokq(char **src, const char *sep, const char *parens)
145 {
146 return (mystrtokq_cw(src, sep, parens, (char *) 0));
147 }
148
149 /* mystrtokq_cw - safe tokenizer with quoting support */
150
mystrtokq_cw(char ** src,const char * sep,const char * parens,const char * blame)151 char *mystrtokq_cw(char **src, const char *sep, const char *parens,
152 const char *blame)
153 {
154 char *start = *src;
155 static char *cp;
156 int ch;
157 int level;
158
159 /*
160 * Skip over leading delimiters.
161 */
162 start += strspn(start, sep);
163 if (*start == 0) {
164 *src = start;
165 return (0);
166 }
167
168 /*
169 * Parse out the next token.
170 */
171 for (level = 0, cp = start; (ch = *(unsigned char *) cp) != 0; cp++) {
172 if (ch == parens[0]) {
173 level++;
174 } else if (level > 0 && ch == parens[1]) {
175 level--;
176 } else if (level == 0 && strchr(sep, ch) != 0) {
177 *cp++ = 0;
178 break;
179 }
180 }
181 *src = cp;
182
183 if (blame && *start == '#') {
184 mystrtok_warn(start, *src, blame);
185 return (0);
186 } else {
187 return (start);
188 }
189 }
190
191 /* mystrtokdq - ABI compatibility wrapper */
192
193 #undef mystrtokdq
194
mystrtokdq(char ** src,const char * sep)195 char *mystrtokdq(char **src, const char *sep)
196 {
197 return (mystrtokdq_cw(src, sep, (char *) 0));
198 }
199
200 /* mystrtokdq_cw - safe tokenizer, double quote and backslash support */
201
mystrtokdq_cw(char ** src,const char * sep,const char * blame)202 char *mystrtokdq_cw(char **src, const char *sep, const char *blame)
203 {
204 char *cp = *src;
205 char *start;
206
207 /*
208 * Skip leading delimiters.
209 */
210 cp += strspn(cp, sep);
211
212 /*
213 * Skip to next unquoted space or comma.
214 */
215 if (*cp == 0) {
216 start = 0;
217 } else {
218 int in_quotes;
219
220 for (in_quotes = 0, start = cp; *cp; cp++) {
221 if (*cp == '\\') {
222 if (*++cp == 0)
223 break;
224 } else if (*cp == '"') {
225 in_quotes = !in_quotes;
226 } else if (!in_quotes && strchr(sep, *(unsigned char *) cp) != 0) {
227 *cp++ = 0;
228 break;
229 }
230 }
231 }
232 *src = cp;
233
234 if (blame && start && *start == '#') {
235 mystrtok_warn(start, *src, blame);
236 return (0);
237 } else {
238 return (start);
239 }
240 }
241
242 #ifdef TEST
243
244 /*
245 * Test program.
246 */
247 #include "msg.h"
248 #include "mymalloc.h"
249
250 /*
251 * The following needs to be large enough to include a null terminator in
252 * every testcase.expected field.
253 */
254 #define EXPECT_SIZE 5
255
256 struct testcase {
257 const char *action;
258 const char *input;
259 const char *expected[EXPECT_SIZE];
260 };
261 static const struct testcase testcases[] = {
262 {"mystrtok", ""},
263 {"mystrtok", " foo ", {"foo"}},
264 {"mystrtok", " foo bar ", {"foo", "bar"}},
265 {"mystrtokq", ""},
266 {"mystrtokq", "foo bar", {"foo", "bar"}},
267 {"mystrtokq", "{ bar } ", {"{ bar }"}},
268 {"mystrtokq", "foo { bar } baz", {"foo", "{ bar }", "baz"}},
269 {"mystrtokq", "foo{ bar } baz", {"foo{ bar }", "baz"}},
270 {"mystrtokq", "foo { bar }baz", {"foo", "{ bar }baz"}},
271 {"mystrtokdq", ""},
272 {"mystrtokdq", " foo ", {"foo"}},
273 {"mystrtokdq", " foo bar ", {"foo", "bar"}},
274 {"mystrtokdq", " foo\\ bar ", {"foo\\ bar"}},
275 {"mystrtokdq", " foo \\\" bar", {"foo", "\\\"", "bar"}},
276 {"mystrtokdq", " foo \" bar baz\" ", {"foo", "\" bar baz\""}},
277 {"mystrtok_cw", "#after text"},
278 {"mystrtok_cw", "before-text #after text", {"before-text"}},
279 {"mystrtokq_cw", "#after text"},
280 {"mystrtokq_cw", "{ before text } #after text", "{ before text }"},
281 {"mystrtokdq_cw", "#after text"},
282 {"mystrtokdq_cw", "\"before text\" #after text", {"\"before text\""}},
283 };
284
main(void)285 int main(void)
286 {
287 const struct testcase *tp;
288 char *actual;
289 int pass;
290 int fail;
291 int match;
292 int n;
293
294 #define NUM_TESTS sizeof(testcases)/sizeof(testcases[0])
295 #define STR_OR_NULL(s) ((s) ? (s) : "null")
296
297 for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
298 char *saved_input = mystrdup(tp->input);
299 char *cp = saved_input;
300
301 msg_info("RUN test case %ld %s >%s<",
302 (long) (tp - testcases), tp->action, tp->input);
303 #if 0
304 msg_info("action=%s", tp->action);
305 msg_info("input=%s", tp->input);
306 for (n = 0; tp->expected[n]; tp++)
307 msg_info("expected[%d]=%s", n, tp->expected[n]);
308 #endif
309
310 for (n = 0; n < EXPECT_SIZE; n++) {
311 if (strcmp(tp->action, "mystrtok") == 0) {
312 actual = mystrtok(&cp, CHARS_SPACE);
313 } else if (strcmp(tp->action, "mystrtokq") == 0) {
314 actual = mystrtokq(&cp, CHARS_SPACE, CHARS_BRACE);
315 } else if (strcmp(tp->action, "mystrtokdq") == 0) {
316 actual = mystrtokdq(&cp, CHARS_SPACE);
317 } else if (strcmp(tp->action, "mystrtok_cw") == 0) {
318 actual = mystrtok_cw(&cp, CHARS_SPACE, "test");
319 } else if (strcmp(tp->action, "mystrtokq_cw") == 0) {
320 actual = mystrtokq_cw(&cp, CHARS_SPACE, CHARS_BRACE, "test");
321 } else if (strcmp(tp->action, "mystrtokdq_cw") == 0) {
322 actual = mystrtokdq_cw(&cp, CHARS_SPACE, "test");
323 } else {
324 msg_panic("invalid command: %s", tp->action);
325 }
326 if ((match = (actual && tp->expected[n]) ?
327 (strcmp(actual, tp->expected[n]) == 0) :
328 (actual == tp->expected[n])) != 0) {
329 if (actual == 0) {
330 msg_info("PASS test %ld", (long) (tp - testcases));
331 pass++;
332 break;
333 }
334 } else {
335 msg_warn("expected: >%s<, got: >%s<",
336 STR_OR_NULL(tp->expected[n]), STR_OR_NULL(actual));
337 msg_info("FAIL test %ld", (long) (tp - testcases));
338 fail++;
339 break;
340 }
341 }
342 if (n >= EXPECT_SIZE)
343 msg_panic("need to increase EXPECT_SIZE");
344 myfree(saved_input);
345 }
346 return (fail > 0);
347 }
348
349 #endif
350