xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/mystrtok.c (revision c48c605c14fd8622b523d1d6a3f0c0bad133ea89)
1 /*	$NetBSD: mystrtok.c,v 1.4 2023/12/23 20:30:46 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	mystrtok 3
6 /* SUMMARY
7 /*	safe tokenizer
8 /* SYNOPSIS
9 /*	#include <stringops.h>
10 /*
11 /*	char	*mystrtok(bufp, delimiters)
12 /*	char	**bufp;
13 /*	const char *delimiters;
14 /*
15 /*	char	*mystrtokq(bufp, delimiters, parens)
16 /*	char	**bufp;
17 /*	const char *delimiters;
18 /*	const char *parens;
19 /*
20 /*	char	*mystrtokdq(bufp, delimiters)
21 /*	char	**bufp;
22 /*	const char *delimiters;
23 /*
24 /*	char	*mystrtok_cw(bufp, delimiters, blame)
25 /*	char	**bufp;
26 /*	const char *delimiters;
27 /*	const char *blame;
28 /*
29 /*	char	*mystrtokq_cw(bufp, delimiters, parens, blame)
30 /*	char	**bufp;
31 /*	const char *delimiters;
32 /*	const char *parens;
33 /*	const char *blame;
34 /*
35 /*	char	*mystrtokdq_cw(bufp, delimiters, blame)
36 /*	char	**bufp;
37 /*	const char *delimiters;
38 /*	const char *blame;
39 /* DESCRIPTION
40 /*	mystrtok() splits a buffer on the specified \fIdelimiters\fR.
41 /*	Tokens are delimited by runs of delimiters, so this routine
42 /*	cannot return zero-length tokens.
43 /*
44 /*	mystrtokq() is like mystrtok() but will not split text
45 /*	between balanced parentheses.  \fIparens\fR specifies the
46 /*	opening and closing parenthesis (one of each).  The set of
47 /*	\fIparens\fR must be distinct from the set of \fIdelimiters\fR.
48 /*
49 /*	mystrtokdq() is like mystrtok() but will not split text
50 /*	between double quotes. The backslash character may be used
51 /*	to escape characters. The double quote and backslash
52 /*	character must not appear in the set of \fIdelimiters\fR.
53 /*
54 /*	The \fIbufp\fR argument specifies the start of the search; it
55 /*	is updated with each call. The input is destroyed.
56 /*
57 /*	The result value is the next token, or a null pointer when the
58 /*	end of the buffer was reached.
59 /*
60 /*	mystrtok_cw(), mystrtokq_cw(), and mystrtokdq_cw, log a
61 /*	warning and return null when the result would look like
62 /*	comment. The \fBblame\fR argument provides context for
63 /*	warning messages. Specify a null pointer to disable the
64 /*	comment check.
65 /* LICENSE
66 /* .ad
67 /* .fi
68 /*	The Secure Mailer license must be distributed with this software.
69 /* AUTHOR(S)
70 /*	Wietse Venema
71 /*	IBM T.J. Watson Research
72 /*	P.O. Box 704
73 /*	Yorktown Heights, NY 10598, USA
74 /*
75 /*	Wietse Venema
76 /*	Google, Inc.
77 /*	111 8th Avenue
78 /*	New York, NY 10011, USA
79 /*--*/
80 
81 /* System library. */
82 
83 #include <sys_defs.h>
84 #include <string.h>
85 
86 /* Utility library. */
87 
88 #include <msg.h>
89 #include <stringops.h>
90 
91 /* mystrtok_warn - warn for #comment after other text */
92 
mystrtok_warn(const char * start,const char * bufp,const char * blame)93 static void mystrtok_warn(const char *start, const char *bufp, const char *blame)
94 {
95     msg_warn("%s: #comment after other text is not allowed: %s %.20s...",
96 	     blame, start, bufp);
97 }
98 
99 /* mystrtok - ABI compatibility wrapper */
100 
101 #undef mystrtok
102 
mystrtok(char ** src,const char * sep)103 char   *mystrtok(char **src, const char *sep)
104 {
105     return (mystrtok_cw(src, sep, (char *) 0));
106 }
107 
108 /* mystrtok - safe tokenizer */
109 
mystrtok_cw(char ** src,const char * sep,const char * blame)110 char   *mystrtok_cw(char **src, const char *sep, const char *blame)
111 {
112     char   *start = *src;
113     char   *end;
114 
115     /*
116      * Skip over leading delimiters.
117      */
118     start += strspn(start, sep);
119     if (*start == 0) {
120 	*src = start;
121 	return (0);
122     }
123 
124     /*
125      * Separate off one token.
126      */
127     end = start + strcspn(start, sep);
128     if (*end != 0)
129 	*end++ = 0;
130     *src = end;
131 
132     if (blame && *start == '#') {
133 	mystrtok_warn(start, *src, blame);
134 	return (0);
135     } else {
136 	return (start);
137     }
138 }
139 
140 /* mystrtokq - ABI compatibility wrapper */
141 
142 #undef mystrtokq
143 
mystrtokq(char ** src,const char * sep,const char * parens)144 char   *mystrtokq(char **src, const char *sep, const char *parens)
145 {
146     return (mystrtokq_cw(src, sep, parens, (char *) 0));
147 }
148 
149 /* mystrtokq_cw - safe tokenizer with quoting support */
150 
mystrtokq_cw(char ** src,const char * sep,const char * parens,const char * blame)151 char   *mystrtokq_cw(char **src, const char *sep, const char *parens,
152 		             const char *blame)
153 {
154     char   *start = *src;
155     static char *cp;
156     int     ch;
157     int     level;
158 
159     /*
160      * Skip over leading delimiters.
161      */
162     start += strspn(start, sep);
163     if (*start == 0) {
164 	*src = start;
165 	return (0);
166     }
167 
168     /*
169      * Parse out the next token.
170      */
171     for (level = 0, cp = start; (ch = *(unsigned char *) cp) != 0; cp++) {
172 	if (ch == parens[0]) {
173 	    level++;
174 	} else if (level > 0 && ch == parens[1]) {
175 	    level--;
176 	} else if (level == 0 && strchr(sep, ch) != 0) {
177 	    *cp++ = 0;
178 	    break;
179 	}
180     }
181     *src = cp;
182 
183     if (blame && *start == '#') {
184 	mystrtok_warn(start, *src, blame);
185 	return (0);
186     } else {
187 	return (start);
188     }
189 }
190 
191 /* mystrtokdq - ABI compatibility wrapper */
192 
193 #undef mystrtokdq
194 
mystrtokdq(char ** src,const char * sep)195 char   *mystrtokdq(char **src, const char *sep)
196 {
197     return (mystrtokdq_cw(src, sep, (char *) 0));
198 }
199 
200 /* mystrtokdq_cw - safe tokenizer, double quote and backslash support */
201 
mystrtokdq_cw(char ** src,const char * sep,const char * blame)202 char   *mystrtokdq_cw(char **src, const char *sep, const char *blame)
203 {
204     char   *cp = *src;
205     char   *start;
206 
207     /*
208      * Skip leading delimiters.
209      */
210     cp += strspn(cp, sep);
211 
212     /*
213      * Skip to next unquoted space or comma.
214      */
215     if (*cp == 0) {
216 	start = 0;
217     } else {
218 	int     in_quotes;
219 
220 	for (in_quotes = 0, start = cp; *cp; cp++) {
221 	    if (*cp == '\\') {
222 		if (*++cp == 0)
223 		    break;
224 	    } else if (*cp == '"') {
225 		in_quotes = !in_quotes;
226 	    } else if (!in_quotes && strchr(sep, *(unsigned char *) cp) != 0) {
227 		*cp++ = 0;
228 		break;
229 	    }
230 	}
231     }
232     *src = cp;
233 
234     if (blame && start && *start == '#') {
235 	mystrtok_warn(start, *src, blame);
236 	return (0);
237     } else {
238 	return (start);
239     }
240 }
241 
242 #ifdef TEST
243 
244  /*
245   * Test program.
246   */
247 #include "msg.h"
248 #include "mymalloc.h"
249 
250  /*
251   * The following needs to be large enough to include a null terminator in
252   * every testcase.expected field.
253   */
254 #define EXPECT_SIZE	5
255 
256 struct testcase {
257     const char *action;
258     const char *input;
259     const char *expected[EXPECT_SIZE];
260 };
261 static const struct testcase testcases[] = {
262     {"mystrtok", ""},
263     {"mystrtok", "  foo  ", {"foo"}},
264     {"mystrtok", "  foo  bar  ", {"foo", "bar"}},
265     {"mystrtokq", ""},
266     {"mystrtokq", "foo bar", {"foo", "bar"}},
267     {"mystrtokq", "{ bar }  ", {"{ bar }"}},
268     {"mystrtokq", "foo { bar } baz", {"foo", "{ bar }", "baz"}},
269     {"mystrtokq", "foo{ bar } baz", {"foo{ bar }", "baz"}},
270     {"mystrtokq", "foo { bar }baz", {"foo", "{ bar }baz"}},
271     {"mystrtokdq", ""},
272     {"mystrtokdq", "  foo  ", {"foo"}},
273     {"mystrtokdq", "  foo  bar  ", {"foo", "bar"}},
274     {"mystrtokdq", "  foo\\ bar  ", {"foo\\ bar"}},
275     {"mystrtokdq", "  foo \\\" bar", {"foo", "\\\"", "bar"}},
276     {"mystrtokdq", "  foo \" bar baz\"  ", {"foo", "\" bar baz\""}},
277     {"mystrtok_cw", "#after text"},
278     {"mystrtok_cw", "before-text #after text", {"before-text"}},
279     {"mystrtokq_cw", "#after text"},
280     {"mystrtokq_cw", "{ before text } #after text", "{ before text }"},
281     {"mystrtokdq_cw", "#after text"},
282     {"mystrtokdq_cw", "\"before text\" #after text", {"\"before text\""}},
283 };
284 
main(void)285 int     main(void)
286 {
287     const struct testcase *tp;
288     char   *actual;
289     int     pass;
290     int     fail;
291     int     match;
292     int     n;
293 
294 #define NUM_TESTS       sizeof(testcases)/sizeof(testcases[0])
295 #define STR_OR_NULL(s)	((s) ? (s) : "null")
296 
297     for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
298 	char   *saved_input = mystrdup(tp->input);
299 	char   *cp = saved_input;
300 
301 	msg_info("RUN test case %ld %s >%s<",
302 		 (long) (tp - testcases), tp->action, tp->input);
303 #if 0
304 	msg_info("action=%s", tp->action);
305 	msg_info("input=%s", tp->input);
306 	for (n = 0; tp->expected[n]; tp++)
307 	    msg_info("expected[%d]=%s", n, tp->expected[n]);
308 #endif
309 
310 	for (n = 0; n < EXPECT_SIZE; n++) {
311 	    if (strcmp(tp->action, "mystrtok") == 0) {
312 		actual = mystrtok(&cp, CHARS_SPACE);
313 	    } else if (strcmp(tp->action, "mystrtokq") == 0) {
314 		actual = mystrtokq(&cp, CHARS_SPACE, CHARS_BRACE);
315 	    } else if (strcmp(tp->action, "mystrtokdq") == 0) {
316 		actual = mystrtokdq(&cp, CHARS_SPACE);
317 	    } else if (strcmp(tp->action, "mystrtok_cw") == 0) {
318 		actual = mystrtok_cw(&cp, CHARS_SPACE, "test");
319 	    } else if (strcmp(tp->action, "mystrtokq_cw") == 0) {
320 		actual = mystrtokq_cw(&cp, CHARS_SPACE, CHARS_BRACE, "test");
321 	    } else if (strcmp(tp->action, "mystrtokdq_cw") == 0) {
322 		actual = mystrtokdq_cw(&cp, CHARS_SPACE, "test");
323 	    } else {
324 		msg_panic("invalid command: %s", tp->action);
325 	    }
326 	    if ((match = (actual && tp->expected[n]) ?
327 		 (strcmp(actual, tp->expected[n]) == 0) :
328 		 (actual == tp->expected[n])) != 0) {
329 		if (actual == 0) {
330 		    msg_info("PASS test %ld", (long) (tp - testcases));
331 		    pass++;
332 		    break;
333 		}
334 	    } else {
335 		msg_warn("expected: >%s<, got: >%s<",
336 			 STR_OR_NULL(tp->expected[n]), STR_OR_NULL(actual));
337 		msg_info("FAIL test %ld", (long) (tp - testcases));
338 		fail++;
339 		break;
340 	    }
341 	}
342 	if (n >= EXPECT_SIZE)
343 	    msg_panic("need to increase EXPECT_SIZE");
344 	myfree(saved_input);
345     }
346     return (fail > 0);
347 }
348 
349 #endif
350