1*11be35a1SLionel Sambuc /* $NetBSD: split.c,v 1.1 2011/01/08 18:10:31 pgoyette Exp $ */
2*11be35a1SLionel Sambuc
3*11be35a1SLionel Sambuc /*-
4*11be35a1SLionel Sambuc * Copyright (c) 1993 The NetBSD Foundation, Inc.
5*11be35a1SLionel Sambuc * All rights reserved.
6*11be35a1SLionel Sambuc *
7*11be35a1SLionel Sambuc * Redistribution and use in source and binary forms, with or without
8*11be35a1SLionel Sambuc * modification, are permitted provided that the following conditions
9*11be35a1SLionel Sambuc * are met:
10*11be35a1SLionel Sambuc * 1. Redistributions of source code must retain the above copyright
11*11be35a1SLionel Sambuc * notice, this list of conditions and the following disclaimer.
12*11be35a1SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
13*11be35a1SLionel Sambuc * notice, this list of conditions and the following disclaimer in the
14*11be35a1SLionel Sambuc * documentation and/or other materials provided with the distribution.
15*11be35a1SLionel Sambuc *
16*11be35a1SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17*11be35a1SLionel Sambuc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18*11be35a1SLionel Sambuc * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19*11be35a1SLionel Sambuc * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20*11be35a1SLionel Sambuc * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21*11be35a1SLionel Sambuc * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22*11be35a1SLionel Sambuc * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23*11be35a1SLionel Sambuc * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24*11be35a1SLionel Sambuc * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25*11be35a1SLionel Sambuc * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26*11be35a1SLionel Sambuc * POSSIBILITY OF SUCH DAMAGE.
27*11be35a1SLionel Sambuc */
28*11be35a1SLionel Sambuc
29*11be35a1SLionel Sambuc #include <regex.h>
30*11be35a1SLionel Sambuc #include <stdio.h>
31*11be35a1SLionel Sambuc #include <string.h>
32*11be35a1SLionel Sambuc
33*11be35a1SLionel Sambuc #include "test_regex.h"
34*11be35a1SLionel Sambuc
35*11be35a1SLionel Sambuc /*
36*11be35a1SLionel Sambuc * split - divide a string into fields, like awk split()
37*11be35a1SLionel Sambuc *
38*11be35a1SLionel Sambuc * returns number of fields, including overflow
39*11be35a1SLionel Sambuc *
40*11be35a1SLionel Sambuc * fields[] list is not NULL-terminated
41*11be35a1SLionel Sambuc * nfields number of entries available in fields[]
42*11be35a1SLionel Sambuc * sep "" white, "c" single char, "ab" [ab]+
43*11be35a1SLionel Sambuc */
44*11be35a1SLionel Sambuc int
split(char * string,char * fields[],int nfields,const char * sep)45*11be35a1SLionel Sambuc split(char *string, char *fields[], int nfields, const char *sep)
46*11be35a1SLionel Sambuc {
47*11be35a1SLionel Sambuc char *p = string;
48*11be35a1SLionel Sambuc char c; /* latest character */
49*11be35a1SLionel Sambuc char sepc = *sep;
50*11be35a1SLionel Sambuc char sepc2;
51*11be35a1SLionel Sambuc int fn;
52*11be35a1SLionel Sambuc char **fp = fields;
53*11be35a1SLionel Sambuc const char *sepp;
54*11be35a1SLionel Sambuc int trimtrail;
55*11be35a1SLionel Sambuc
56*11be35a1SLionel Sambuc /* white space */
57*11be35a1SLionel Sambuc if (sepc == '\0') {
58*11be35a1SLionel Sambuc while ((c = *p++) == ' ' || c == '\t')
59*11be35a1SLionel Sambuc continue;
60*11be35a1SLionel Sambuc p--;
61*11be35a1SLionel Sambuc trimtrail = 1;
62*11be35a1SLionel Sambuc sep = " \t"; /* note, code below knows this is 2 long */
63*11be35a1SLionel Sambuc sepc = ' ';
64*11be35a1SLionel Sambuc } else
65*11be35a1SLionel Sambuc trimtrail = 0;
66*11be35a1SLionel Sambuc sepc2 = sep[1]; /* now we can safely pick this up */
67*11be35a1SLionel Sambuc
68*11be35a1SLionel Sambuc /* catch empties */
69*11be35a1SLionel Sambuc if (*p == '\0')
70*11be35a1SLionel Sambuc return(0);
71*11be35a1SLionel Sambuc
72*11be35a1SLionel Sambuc /* single separator */
73*11be35a1SLionel Sambuc if (sepc2 == '\0') {
74*11be35a1SLionel Sambuc fn = nfields;
75*11be35a1SLionel Sambuc for (;;) {
76*11be35a1SLionel Sambuc *fp++ = p;
77*11be35a1SLionel Sambuc fn--;
78*11be35a1SLionel Sambuc if (fn == 0)
79*11be35a1SLionel Sambuc break;
80*11be35a1SLionel Sambuc while ((c = *p++) != sepc)
81*11be35a1SLionel Sambuc if (c == '\0')
82*11be35a1SLionel Sambuc return(nfields - fn);
83*11be35a1SLionel Sambuc *(p-1) = '\0';
84*11be35a1SLionel Sambuc }
85*11be35a1SLionel Sambuc /* we have overflowed the fields vector -- just count them */
86*11be35a1SLionel Sambuc fn = nfields;
87*11be35a1SLionel Sambuc for (;;) {
88*11be35a1SLionel Sambuc while ((c = *p++) != sepc)
89*11be35a1SLionel Sambuc if (c == '\0')
90*11be35a1SLionel Sambuc return(fn);
91*11be35a1SLionel Sambuc fn++;
92*11be35a1SLionel Sambuc }
93*11be35a1SLionel Sambuc /* not reached */
94*11be35a1SLionel Sambuc }
95*11be35a1SLionel Sambuc
96*11be35a1SLionel Sambuc /* two separators */
97*11be35a1SLionel Sambuc if (sep[2] == '\0') {
98*11be35a1SLionel Sambuc fn = nfields;
99*11be35a1SLionel Sambuc for (;;) {
100*11be35a1SLionel Sambuc *fp++ = p;
101*11be35a1SLionel Sambuc fn--;
102*11be35a1SLionel Sambuc while ((c = *p++) != sepc && c != sepc2)
103*11be35a1SLionel Sambuc if (c == '\0') {
104*11be35a1SLionel Sambuc if (trimtrail && **(fp-1) == '\0')
105*11be35a1SLionel Sambuc fn++;
106*11be35a1SLionel Sambuc return(nfields - fn);
107*11be35a1SLionel Sambuc }
108*11be35a1SLionel Sambuc if (fn == 0)
109*11be35a1SLionel Sambuc break;
110*11be35a1SLionel Sambuc *(p-1) = '\0';
111*11be35a1SLionel Sambuc while ((c = *p++) == sepc || c == sepc2)
112*11be35a1SLionel Sambuc continue;
113*11be35a1SLionel Sambuc p--;
114*11be35a1SLionel Sambuc }
115*11be35a1SLionel Sambuc /* we have overflowed the fields vector -- just count them */
116*11be35a1SLionel Sambuc fn = nfields;
117*11be35a1SLionel Sambuc while (c != '\0') {
118*11be35a1SLionel Sambuc while ((c = *p++) == sepc || c == sepc2)
119*11be35a1SLionel Sambuc continue;
120*11be35a1SLionel Sambuc p--;
121*11be35a1SLionel Sambuc fn++;
122*11be35a1SLionel Sambuc while ((c = *p++) != '\0' && c != sepc && c != sepc2)
123*11be35a1SLionel Sambuc continue;
124*11be35a1SLionel Sambuc }
125*11be35a1SLionel Sambuc /* might have to trim trailing white space */
126*11be35a1SLionel Sambuc if (trimtrail) {
127*11be35a1SLionel Sambuc p--;
128*11be35a1SLionel Sambuc while ((c = *--p) == sepc || c == sepc2)
129*11be35a1SLionel Sambuc continue;
130*11be35a1SLionel Sambuc p++;
131*11be35a1SLionel Sambuc if (*p != '\0') {
132*11be35a1SLionel Sambuc if (fn == nfields+1)
133*11be35a1SLionel Sambuc *p = '\0';
134*11be35a1SLionel Sambuc fn--;
135*11be35a1SLionel Sambuc }
136*11be35a1SLionel Sambuc }
137*11be35a1SLionel Sambuc return(fn);
138*11be35a1SLionel Sambuc }
139*11be35a1SLionel Sambuc
140*11be35a1SLionel Sambuc /* n separators */
141*11be35a1SLionel Sambuc fn = 0;
142*11be35a1SLionel Sambuc for (;;) {
143*11be35a1SLionel Sambuc if (fn < nfields)
144*11be35a1SLionel Sambuc *fp++ = p;
145*11be35a1SLionel Sambuc fn++;
146*11be35a1SLionel Sambuc for (;;) {
147*11be35a1SLionel Sambuc c = *p++;
148*11be35a1SLionel Sambuc if (c == '\0')
149*11be35a1SLionel Sambuc return(fn);
150*11be35a1SLionel Sambuc sepp = sep;
151*11be35a1SLionel Sambuc while ((sepc = *sepp++) != '\0' && sepc != c)
152*11be35a1SLionel Sambuc continue;
153*11be35a1SLionel Sambuc if (sepc != '\0') /* it was a separator */
154*11be35a1SLionel Sambuc break;
155*11be35a1SLionel Sambuc }
156*11be35a1SLionel Sambuc if (fn < nfields)
157*11be35a1SLionel Sambuc *(p-1) = '\0';
158*11be35a1SLionel Sambuc for (;;) {
159*11be35a1SLionel Sambuc c = *p++;
160*11be35a1SLionel Sambuc sepp = sep;
161*11be35a1SLionel Sambuc while ((sepc = *sepp++) != '\0' && sepc != c)
162*11be35a1SLionel Sambuc continue;
163*11be35a1SLionel Sambuc if (sepc == '\0') /* it wasn't a separator */
164*11be35a1SLionel Sambuc break;
165*11be35a1SLionel Sambuc }
166*11be35a1SLionel Sambuc p--;
167*11be35a1SLionel Sambuc }
168*11be35a1SLionel Sambuc
169*11be35a1SLionel Sambuc /* not reached */
170*11be35a1SLionel Sambuc }
171*11be35a1SLionel Sambuc
172*11be35a1SLionel Sambuc #ifdef TEST_SPLIT
173*11be35a1SLionel Sambuc
174*11be35a1SLionel Sambuc
175*11be35a1SLionel Sambuc /*
176*11be35a1SLionel Sambuc * test program
177*11be35a1SLionel Sambuc * pgm runs regression
178*11be35a1SLionel Sambuc * pgm sep splits stdin lines by sep
179*11be35a1SLionel Sambuc * pgm str sep splits str by sep
180*11be35a1SLionel Sambuc * pgm str sep n splits str by sep n times
181*11be35a1SLionel Sambuc */
182*11be35a1SLionel Sambuc int
main(int argc,char * argv[])183*11be35a1SLionel Sambuc main(int argc, char *argv[])
184*11be35a1SLionel Sambuc {
185*11be35a1SLionel Sambuc char buf[512];
186*11be35a1SLionel Sambuc int n;
187*11be35a1SLionel Sambuc # define MNF 10
188*11be35a1SLionel Sambuc char *fields[MNF];
189*11be35a1SLionel Sambuc
190*11be35a1SLionel Sambuc if (argc > 4)
191*11be35a1SLionel Sambuc for (n = atoi(argv[3]); n > 0; n--) {
192*11be35a1SLionel Sambuc (void) strcpy(buf, argv[1]);
193*11be35a1SLionel Sambuc }
194*11be35a1SLionel Sambuc else if (argc > 3)
195*11be35a1SLionel Sambuc for (n = atoi(argv[3]); n > 0; n--) {
196*11be35a1SLionel Sambuc (void) strcpy(buf, argv[1]);
197*11be35a1SLionel Sambuc (void) split(buf, fields, MNF, argv[2]);
198*11be35a1SLionel Sambuc }
199*11be35a1SLionel Sambuc else if (argc > 2)
200*11be35a1SLionel Sambuc dosplit(argv[1], argv[2]);
201*11be35a1SLionel Sambuc else if (argc > 1)
202*11be35a1SLionel Sambuc while (fgets(buf, sizeof(buf), stdin) != NULL) {
203*11be35a1SLionel Sambuc buf[strlen(buf)-1] = '\0'; /* stomp newline */
204*11be35a1SLionel Sambuc dosplit(buf, argv[1]);
205*11be35a1SLionel Sambuc }
206*11be35a1SLionel Sambuc else
207*11be35a1SLionel Sambuc regress();
208*11be35a1SLionel Sambuc
209*11be35a1SLionel Sambuc exit(0);
210*11be35a1SLionel Sambuc }
211*11be35a1SLionel Sambuc
212*11be35a1SLionel Sambuc void
dosplit(char * string,char * seps)213*11be35a1SLionel Sambuc dosplit(char *string, char *seps)
214*11be35a1SLionel Sambuc {
215*11be35a1SLionel Sambuc # define NF 5
216*11be35a1SLionel Sambuc char *fields[NF];
217*11be35a1SLionel Sambuc int nf;
218*11be35a1SLionel Sambuc
219*11be35a1SLionel Sambuc nf = split(string, fields, NF, seps);
220*11be35a1SLionel Sambuc print(nf, NF, fields);
221*11be35a1SLionel Sambuc }
222*11be35a1SLionel Sambuc
223*11be35a1SLionel Sambuc void
print(int nf,int nfp,char * fields)224*11be35a1SLionel Sambuc print(int nf, int nfp, char *fields)
225*11be35a1SLionel Sambuc {
226*11be35a1SLionel Sambuc int fn;
227*11be35a1SLionel Sambuc int bound;
228*11be35a1SLionel Sambuc
229*11be35a1SLionel Sambuc bound = (nf > nfp) ? nfp : nf;
230*11be35a1SLionel Sambuc printf("%d:\t", nf);
231*11be35a1SLionel Sambuc for (fn = 0; fn < bound; fn++)
232*11be35a1SLionel Sambuc printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
233*11be35a1SLionel Sambuc }
234*11be35a1SLionel Sambuc
235*11be35a1SLionel Sambuc #define RNF 5 /* some table entries know this */
236*11be35a1SLionel Sambuc struct {
237*11be35a1SLionel Sambuc char *str;
238*11be35a1SLionel Sambuc char *seps;
239*11be35a1SLionel Sambuc int nf;
240*11be35a1SLionel Sambuc char *fi[RNF];
241*11be35a1SLionel Sambuc } tests[] = {
242*11be35a1SLionel Sambuc "", " ", 0, { "" },
243*11be35a1SLionel Sambuc " ", " ", 2, { "", "" },
244*11be35a1SLionel Sambuc "x", " ", 1, { "x" },
245*11be35a1SLionel Sambuc "xy", " ", 1, { "xy" },
246*11be35a1SLionel Sambuc "x y", " ", 2, { "x", "y" },
247*11be35a1SLionel Sambuc "abc def g ", " ", 5, { "abc", "def", "", "g", "" },
248*11be35a1SLionel Sambuc " a bcd", " ", 4, { "", "", "a", "bcd" },
249*11be35a1SLionel Sambuc "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
250*11be35a1SLionel Sambuc " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
251*11be35a1SLionel Sambuc
252*11be35a1SLionel Sambuc "", " _", 0, { "" },
253*11be35a1SLionel Sambuc " ", " _", 2, { "", "" },
254*11be35a1SLionel Sambuc "x", " _", 1, { "x" },
255*11be35a1SLionel Sambuc "x y", " _", 2, { "x", "y" },
256*11be35a1SLionel Sambuc "ab _ cd", " _", 2, { "ab", "cd" },
257*11be35a1SLionel Sambuc " a_b c ", " _", 5, { "", "a", "b", "c", "" },
258*11be35a1SLionel Sambuc "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" },
259*11be35a1SLionel Sambuc " a b c d ", " _", 6, { "", "a", "b", "c", "d " },
260*11be35a1SLionel Sambuc
261*11be35a1SLionel Sambuc "", " _~", 0, { "" },
262*11be35a1SLionel Sambuc " ", " _~", 2, { "", "" },
263*11be35a1SLionel Sambuc "x", " _~", 1, { "x" },
264*11be35a1SLionel Sambuc "x y", " _~", 2, { "x", "y" },
265*11be35a1SLionel Sambuc "ab _~ cd", " _~", 2, { "ab", "cd" },
266*11be35a1SLionel Sambuc " a_b c~", " _~", 5, { "", "a", "b", "c", "" },
267*11be35a1SLionel Sambuc "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" },
268*11be35a1SLionel Sambuc "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " },
269*11be35a1SLionel Sambuc
270*11be35a1SLionel Sambuc "", " _~-", 0, { "" },
271*11be35a1SLionel Sambuc " ", " _~-", 2, { "", "" },
272*11be35a1SLionel Sambuc "x", " _~-", 1, { "x" },
273*11be35a1SLionel Sambuc "x y", " _~-", 2, { "x", "y" },
274*11be35a1SLionel Sambuc "ab _~- cd", " _~-", 2, { "ab", "cd" },
275*11be35a1SLionel Sambuc " a_b c~", " _~-", 5, { "", "a", "b", "c", "" },
276*11be35a1SLionel Sambuc "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" },
277*11be35a1SLionel Sambuc "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " },
278*11be35a1SLionel Sambuc
279*11be35a1SLionel Sambuc "", " ", 0, { "" },
280*11be35a1SLionel Sambuc " ", " ", 2, { "", "" },
281*11be35a1SLionel Sambuc "x", " ", 1, { "x" },
282*11be35a1SLionel Sambuc "xy", " ", 1, { "xy" },
283*11be35a1SLionel Sambuc "x y", " ", 2, { "x", "y" },
284*11be35a1SLionel Sambuc "abc def g ", " ", 4, { "abc", "def", "g", "" },
285*11be35a1SLionel Sambuc " a bcd", " ", 3, { "", "a", "bcd" },
286*11be35a1SLionel Sambuc "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
287*11be35a1SLionel Sambuc " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
288*11be35a1SLionel Sambuc
289*11be35a1SLionel Sambuc "", "", 0, { "" },
290*11be35a1SLionel Sambuc " ", "", 0, { "" },
291*11be35a1SLionel Sambuc "x", "", 1, { "x" },
292*11be35a1SLionel Sambuc "xy", "", 1, { "xy" },
293*11be35a1SLionel Sambuc "x y", "", 2, { "x", "y" },
294*11be35a1SLionel Sambuc "abc def g ", "", 3, { "abc", "def", "g" },
295*11be35a1SLionel Sambuc "\t a bcd", "", 2, { "a", "bcd" },
296*11be35a1SLionel Sambuc " a \tb\t c ", "", 3, { "a", "b", "c" },
297*11be35a1SLionel Sambuc "a b c d e ", "", 5, { "a", "b", "c", "d", "e" },
298*11be35a1SLionel Sambuc "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" },
299*11be35a1SLionel Sambuc " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " },
300*11be35a1SLionel Sambuc
301*11be35a1SLionel Sambuc NULL, NULL, 0, { NULL },
302*11be35a1SLionel Sambuc };
303*11be35a1SLionel Sambuc
304*11be35a1SLionel Sambuc void
regress(void)305*11be35a1SLionel Sambuc regress(void)
306*11be35a1SLionel Sambuc {
307*11be35a1SLionel Sambuc char buf[512];
308*11be35a1SLionel Sambuc int n;
309*11be35a1SLionel Sambuc char *fields[RNF+1];
310*11be35a1SLionel Sambuc int nf;
311*11be35a1SLionel Sambuc int i;
312*11be35a1SLionel Sambuc int printit;
313*11be35a1SLionel Sambuc char *f;
314*11be35a1SLionel Sambuc
315*11be35a1SLionel Sambuc for (n = 0; tests[n].str != NULL; n++) {
316*11be35a1SLionel Sambuc (void) strcpy(buf, tests[n].str);
317*11be35a1SLionel Sambuc fields[RNF] = NULL;
318*11be35a1SLionel Sambuc nf = split(buf, fields, RNF, tests[n].seps);
319*11be35a1SLionel Sambuc printit = 0;
320*11be35a1SLionel Sambuc if (nf != tests[n].nf) {
321*11be35a1SLionel Sambuc printf("split `%s' by `%s' gave %d fields, not %d\n",
322*11be35a1SLionel Sambuc tests[n].str, tests[n].seps, nf, tests[n].nf);
323*11be35a1SLionel Sambuc printit = 1;
324*11be35a1SLionel Sambuc } else if (fields[RNF] != NULL) {
325*11be35a1SLionel Sambuc printf("split() went beyond array end\n");
326*11be35a1SLionel Sambuc printit = 1;
327*11be35a1SLionel Sambuc } else {
328*11be35a1SLionel Sambuc for (i = 0; i < nf && i < RNF; i++) {
329*11be35a1SLionel Sambuc f = fields[i];
330*11be35a1SLionel Sambuc if (f == NULL)
331*11be35a1SLionel Sambuc f = "(NULL)";
332*11be35a1SLionel Sambuc if (strcmp(f, tests[n].fi[i]) != 0) {
333*11be35a1SLionel Sambuc printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
334*11be35a1SLionel Sambuc tests[n].str, tests[n].seps,
335*11be35a1SLionel Sambuc i, fields[i], tests[n].fi[i]);
336*11be35a1SLionel Sambuc printit = 1;
337*11be35a1SLionel Sambuc }
338*11be35a1SLionel Sambuc }
339*11be35a1SLionel Sambuc }
340*11be35a1SLionel Sambuc if (printit)
341*11be35a1SLionel Sambuc print(nf, RNF, fields);
342*11be35a1SLionel Sambuc }
343*11be35a1SLionel Sambuc }
344*11be35a1SLionel Sambuc #endif
345