xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/mystrtok.c (revision 3587d6f89c746bbb4f886219ddacd41ace480ecf)
1 /*	$NetBSD: mystrtok.c,v 1.3 2022/10/08 16:12:50 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	mystrtok 3
6 /* SUMMARY
7 /*	safe tokenizer
8 /* SYNOPSIS
9 /*	#include <stringops.h>
10 /*
11 /*	char	*mystrtok(bufp, delimiters)
12 /*	char	**bufp;
13 /*	const char *delimiters;
14 /*
15 /*	char	*mystrtokq(bufp, delimiters, parens)
16 /*	char	**bufp;
17 /*	const char *delimiters;
18 /*	const char *parens;
19 /*
20 /*	char	*mystrtokdq(bufp, delimiters)
21 /*	char	**bufp;
22 /*	const char *delimiters;
23 /* DESCRIPTION
24 /*	mystrtok() splits a buffer on the specified \fIdelimiters\fR.
25 /*	Tokens are delimited by runs of delimiters, so this routine
26 /*	cannot return zero-length tokens.
27 /*
28 /*	mystrtokq() is like mystrtok() but will not split text
29 /*	between balanced parentheses.  \fIparens\fR specifies the
30 /*	opening and closing parenthesis (one of each).  The set of
31 /*	\fIparens\fR must be distinct from the set of \fIdelimiters\fR.
32 /*
33 /*	mystrtokdq() is like mystrtok() but will not split text
34 /*	between double quotes. The backslash character may be used
35 /*	to escape characters. The double quote and backslash
36 /*	character must not appear in the set of \fIdelimiters\fR.
37 /*
38 /*	The \fIbufp\fR argument specifies the start of the search; it
39 /*	is updated with each call. The input is destroyed.
40 /*
41 /*	The result value is the next token, or a null pointer when the
42 /*	end of the buffer was reached.
43 /* LICENSE
44 /* .ad
45 /* .fi
46 /*	The Secure Mailer license must be distributed with this software.
47 /* AUTHOR(S)
48 /*	Wietse Venema
49 /*	IBM T.J. Watson Research
50 /*	P.O. Box 704
51 /*	Yorktown Heights, NY 10598, USA
52 /*
53 /*	Wietse Venema
54 /*	Google, Inc.
55 /*	111 8th Avenue
56 /*	New York, NY 10011, USA
57 /*--*/
58 
59 /* System library. */
60 
61 #include "sys_defs.h"
62 #include <string.h>
63 
64 /* Utility library. */
65 
66 #include "stringops.h"
67 
68 /* mystrtok - safe tokenizer */
69 
70 char   *mystrtok(char **src, const char *sep)
71 {
72     char   *start = *src;
73     char   *end;
74 
75     /*
76      * Skip over leading delimiters.
77      */
78     start += strspn(start, sep);
79     if (*start == 0) {
80 	*src = start;
81 	return (0);
82     }
83 
84     /*
85      * Separate off one token.
86      */
87     end = start + strcspn(start, sep);
88     if (*end != 0)
89 	*end++ = 0;
90     *src = end;
91     return (start);
92 }
93 
94 /* mystrtokq - safe tokenizer with quoting support */
95 
96 char   *mystrtokq(char **src, const char *sep, const char *parens)
97 {
98     char   *start = *src;
99     static char *cp;
100     int     ch;
101     int     level;
102 
103     /*
104      * Skip over leading delimiters.
105      */
106     start += strspn(start, sep);
107     if (*start == 0) {
108 	*src = start;
109 	return (0);
110     }
111 
112     /*
113      * Parse out the next token.
114      */
115     for (level = 0, cp = start; (ch = *(unsigned char *) cp) != 0; cp++) {
116 	if (ch == parens[0]) {
117 	    level++;
118 	} else if (level > 0 && ch == parens[1]) {
119 	    level--;
120 	} else if (level == 0 && strchr(sep, ch) != 0) {
121 	    *cp++ = 0;
122 	    break;
123 	}
124     }
125     *src = cp;
126     return (start);
127 }
128 
129 /* mystrtokdq - safe tokenizer, double quote and backslash support */
130 
131 char   *mystrtokdq(char **src, const char *sep)
132 {
133     char   *cp = *src;
134     char   *start;
135 
136     /*
137      * Skip leading delimiters.
138      */
139     cp += strspn(cp, sep);
140 
141     /*
142      * Skip to next unquoted space or comma.
143      */
144     if (*cp == 0) {
145 	start = 0;
146     } else {
147 	int     in_quotes;
148 
149 	for (in_quotes = 0, start = cp; *cp; cp++) {
150 	    if (*cp == '\\') {
151 		if (*++cp == 0)
152 		    break;
153 	    } else if (*cp == '"') {
154 		in_quotes = !in_quotes;
155 	    } else if (!in_quotes && strchr(sep, *(unsigned char *) cp) != 0) {
156 		*cp++ = 0;
157 		break;
158 	    }
159 	}
160     }
161     *src = cp;
162     return (start);
163 }
164 
165 #ifdef TEST
166 
167  /*
168   * Test program.
169   */
170 #include "msg.h"
171 #include "mymalloc.h"
172 
173  /*
174   * The following needs to be large enough to include a null terminator in
175   * every testcase.expected field.
176   */
177 #define EXPECT_SIZE	5
178 
179 struct testcase {
180     const char *action;
181     const char *input;
182     const char *expected[EXPECT_SIZE];
183 };
184 static const struct testcase testcases[] = {
185     {"mystrtok", ""},
186     {"mystrtok", "  foo  ", {"foo"}},
187     {"mystrtok", "  foo  bar  ", {"foo", "bar"}},
188     {"mystrtokq", ""},
189     {"mystrtokq", "foo bar", {"foo", "bar"}},
190     {"mystrtokq", "{ bar }  ", {"{ bar }"}},
191     {"mystrtokq", "foo { bar } baz", {"foo", "{ bar }", "baz"}},
192     {"mystrtokq", "foo{ bar } baz", {"foo{ bar }", "baz"}},
193     {"mystrtokq", "foo { bar }baz", {"foo", "{ bar }baz"}},
194     {"mystrtokdq", ""},
195     {"mystrtokdq", "  foo  ", {"foo"}},
196     {"mystrtokdq", "  foo  bar  ", {"foo", "bar"}},
197     {"mystrtokdq", "  foo\\ bar  ", {"foo\\ bar"}},
198     {"mystrtokdq", "  foo \\\" bar", {"foo", "\\\"", "bar"}},
199     {"mystrtokdq", "  foo \" bar baz\"  ", {"foo", "\" bar baz\""}},
200 };
201 
202 int     main(void)
203 {
204     const struct testcase *tp;
205     char   *actual;
206     int     pass;
207     int     fail;
208     int     match;
209     int     n;
210 
211 #define NUM_TESTS       sizeof(testcases)/sizeof(testcases[0])
212 #define STR_OR_NULL(s)	((s) ? (s) : "null")
213 
214     for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
215 	char   *saved_input = mystrdup(tp->input);
216 	char   *cp = saved_input;
217 
218 	msg_info("RUN test case %ld %s >%s<",
219 		 (long) (tp - testcases), tp->action, tp->input);
220 #if 0
221 	msg_info("action=%s", tp->action);
222 	msg_info("input=%s", tp->input);
223 	for (n = 0; tp->expected[n]; tp++)
224 	    msg_info("expected[%d]=%s", n, tp->expected[n]);
225 #endif
226 
227 	for (n = 0; n < EXPECT_SIZE; n++) {
228 	    if (strcmp(tp->action, "mystrtok") == 0) {
229 		actual = mystrtok(&cp, CHARS_SPACE);
230 	    } else if (strcmp(tp->action, "mystrtokq") == 0) {
231 		actual = mystrtokq(&cp, CHARS_SPACE, CHARS_BRACE);
232 	    } else if (strcmp(tp->action, "mystrtokdq") == 0) {
233 		actual = mystrtokdq(&cp, CHARS_SPACE);
234 	    } else {
235 		msg_panic("invalid command: %s", tp->action);
236 	    }
237 	    if ((match = (actual && tp->expected[n]) ?
238 		 (strcmp(actual, tp->expected[n]) == 0) :
239 		 (actual == tp->expected[n])) != 0) {
240 		if (actual == 0) {
241 		    msg_info("PASS test %ld", (long) (tp - testcases));
242 		    pass++;
243 		    break;
244 		}
245 	    } else {
246 		msg_warn("expected: >%s<, got: >%s<",
247 			 STR_OR_NULL(tp->expected[n]), STR_OR_NULL(actual));
248 		msg_info("FAIL test %ld", (long) (tp - testcases));
249 		fail++;
250 		break;
251 	    }
252 	}
253 	if (n >= EXPECT_SIZE)
254 	    msg_panic("need to increase EXPECT_SIZE");
255 	myfree(saved_input);
256     }
257     return (fail > 0);
258 }
259 
260 #endif
261