xref: /openbsd-src/regress/lib/libc/regex/main.c (revision 47911bd667ac77dc523b8a13ef40b012dbffa741)
1 /*	$OpenBSD: main.c,v 1.3 1997/01/15 23:41:07 millert Exp $	*/
2 /*	$NetBSD: main.c,v 1.2 1995/04/20 22:39:51 cgd Exp $	*/
3 
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <sys/types.h>
8 #include <regex.h>
9 #include <assert.h>
10 #include <unistd.h>
11 
12 #include "main.ih"
13 
14 char *progname;
15 int debug = 0;
16 int line = 0;
17 int status = 0;
18 
19 int copts = REG_EXTENDED;
20 int eopts = 0;
21 regoff_t startoff = 0;
22 regoff_t endoff = 0;
23 
24 
25 extern int split();
26 extern void regprint();
27 
28 /*
29  - main - do the simple case, hand off to regress() for regression
30  */
31 int
32 main(argc, argv)
33 int argc;
34 char *argv[];
35 {
36 	regex_t re;
37 #	define	NS	10
38 	regmatch_t subs[NS];
39 	char erbuf[100];
40 	int err;
41 	size_t len;
42 	int c;
43 	int errflg = 0;
44 	register int i;
45 	extern int optind;
46 	extern char *optarg;
47 
48 	progname = argv[0];
49 
50 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
51 		switch (c) {
52 		case 'c':	/* compile options */
53 			copts = options('c', optarg);
54 			break;
55 		case 'e':	/* execute options */
56 			eopts = options('e', optarg);
57 			break;
58 		case 'S':	/* start offset */
59 			startoff = (regoff_t)atoi(optarg);
60 			break;
61 		case 'E':	/* end offset */
62 			endoff = (regoff_t)atoi(optarg);
63 			break;
64 		case 'x':	/* Debugging. */
65 			debug++;
66 			break;
67 		case '?':
68 		default:
69 			errflg++;
70 			break;
71 		}
72 	if (errflg) {
73 		fprintf(stderr, "usage: %s ", progname);
74 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
75 		exit(2);
76 	}
77 
78 	if (optind >= argc) {
79 		regress(stdin);
80 		exit(status);
81 	}
82 
83 	err = regcomp(&re, argv[optind++], copts);
84 	if (err) {
85 		len = regerror(err, &re, erbuf, sizeof(erbuf));
86 		fprintf(stderr, "error %s, %d/%d `%s'\n",
87 			eprint(err), len, sizeof(erbuf), erbuf);
88 		exit(status);
89 	}
90 	regprint(&re, stdout);
91 
92 	if (optind >= argc) {
93 		regfree(&re);
94 		exit(status);
95 	}
96 
97 	if (eopts&REG_STARTEND) {
98 		subs[0].rm_so = startoff;
99 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
100 	}
101 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
102 	if (err) {
103 		len = regerror(err, &re, erbuf, sizeof(erbuf));
104 		fprintf(stderr, "error %s, %d/%d `%s'\n",
105 			eprint(err), len, sizeof(erbuf), erbuf);
106 		exit(status);
107 	}
108 	if (!(copts&REG_NOSUB)) {
109 		len = (size_t)(subs[0].rm_eo - subs[0].rm_so);
110 		if (subs[0].rm_so != -1) {
111 			if (len != 0)
112 				printf("match `%.*s'\n", (int)len,
113 					argv[optind] + subs[0].rm_so);
114 			else
115 				printf("match `'@%.1s\n",
116 					argv[optind] + subs[0].rm_so);
117 		}
118 		for (i = 1; i < NS; i++)
119 			if (subs[i].rm_so != -1)
120 				printf("(%d) `%.*s'\n", i,
121 					(int)(subs[i].rm_eo - subs[i].rm_so),
122 					argv[optind] + subs[i].rm_so);
123 	}
124 	exit(status);
125 }
126 
127 /*
128  - regress - main loop of regression test
129  == void regress(FILE *in);
130  */
131 void
132 regress(in)
133 FILE *in;
134 {
135 	char inbuf[1000];
136 #	define	MAXF	10
137 	char *f[MAXF];
138 	int nf;
139 	int i;
140 	char erbuf[100];
141 	size_t ne;
142 	char *badpat = "invalid regular expression";
143 #	define	SHORT	10
144 	char *bpname = "REG_BADPAT";
145 	regex_t re;
146 
147 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
148 		line++;
149 		if (inbuf[0] == '#' || inbuf[0] == '\n')
150 			continue;			/* NOTE CONTINUE */
151 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
152 		if (debug)
153 			fprintf(stdout, "%d:\n", line);
154 		nf = split(inbuf, f, MAXF, "\t\t");
155 		if (nf < 3) {
156 			fprintf(stderr, "bad input, line %d\n", line);
157 			exit(1);
158 		}
159 		for (i = 0; i < nf; i++)
160 			if (strcmp(f[i], "\"\"") == 0)
161 				f[i] = "";
162 		if (nf <= 3)
163 			f[3] = NULL;
164 		if (nf <= 4)
165 			f[4] = NULL;
166 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
167 		if (opt('&', f[1]))	/* try with either type of RE */
168 			try(f[0], f[1], f[2], f[3], f[4],
169 					options('c', f[1]) &~ REG_EXTENDED);
170 	}
171 
172 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
173 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
174 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
175 							erbuf, badpat);
176 		status = 1;
177 	}
178 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
179 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
180 						ne != strlen(badpat)+1) {
181 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
182 						erbuf, SHORT-1, badpat);
183 		status = 1;
184 	}
185 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
186 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
187 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
188 						erbuf, bpname);
189 		status = 1;
190 	}
191 	re.re_endp = bpname;
192 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
193 	if (atoi(erbuf) != (int)REG_BADPAT) {
194 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
195 						erbuf, (long)REG_BADPAT);
196 		status = 1;
197 	} else if (ne != strlen(erbuf)+1) {
198 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
199 						erbuf, (long)REG_BADPAT);
200 		status = 1;
201 	}
202 }
203 
204 /*
205  - try - try it, and report on problems
206  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
207  */
208 void
209 try(f0, f1, f2, f3, f4, opts)
210 char *f0;
211 char *f1;
212 char *f2;
213 char *f3;
214 char *f4;
215 int opts;			/* may not match f1 */
216 {
217 	regex_t re;
218 #	define	NSUBS	10
219 	regmatch_t subs[NSUBS];
220 #	define	NSHOULD	15
221 	char *should[NSHOULD];
222 	int nshould;
223 	char erbuf[100];
224 	int err;
225 	int len;
226 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
227 	register int i;
228 	char *grump;
229 	char f0copy[1000];
230 	char f2copy[1000];
231 
232 	strcpy(f0copy, f0);
233 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
234 	fixstr(f0copy);
235 	err = regcomp(&re, f0copy, opts);
236 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
237 		/* unexpected error or wrong error */
238 		len = regerror(err, &re, erbuf, sizeof(erbuf));
239 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
240 					line, type, eprint(err), len,
241 					sizeof(erbuf), erbuf);
242 		status = 1;
243 	} else if (err == 0 && opt('C', f1)) {
244 		/* unexpected success */
245 		fprintf(stderr, "%d: %s should have given REG_%s\n",
246 						line, type, f2);
247 		status = 1;
248 		err = 1;	/* so we won't try regexec */
249 	}
250 
251 	if (err != 0) {
252 		regfree(&re);
253 		return;
254 	}
255 
256 	strcpy(f2copy, f2);
257 	fixstr(f2copy);
258 
259 	if (options('e', f1)&REG_STARTEND) {
260 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
261 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
262 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
263 		subs[0].rm_eo = strchr(f2, ')') - f2;
264 	}
265 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
266 
267 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
268 		/* unexpected error or wrong error */
269 		len = regerror(err, &re, erbuf, sizeof(erbuf));
270 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
271 					line, type, eprint(err), len,
272 					sizeof(erbuf), erbuf);
273 		status = 1;
274 	} else if (err != 0) {
275 		/* nothing more to check */
276 	} else if (f3 == NULL) {
277 		/* unexpected success */
278 		fprintf(stderr, "%d: %s exec should have failed\n",
279 						line, type);
280 		status = 1;
281 		err = 1;		/* just on principle */
282 	} else if (opts&REG_NOSUB) {
283 		/* nothing more to check */
284 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
285 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
286 		status = 1;
287 		err = 1;
288 	}
289 
290 	if (err != 0 || f4 == NULL) {
291 		regfree(&re);
292 		return;
293 	}
294 
295 	for (i = 1; i < NSHOULD; i++)
296 		should[i] = NULL;
297 	nshould = split(f4, should+1, NSHOULD-1, ",");
298 	if (nshould == 0) {
299 		nshould = 1;
300 		should[1] = "";
301 	}
302 	for (i = 1; i < NSUBS; i++) {
303 		grump = check(f2, subs[i], should[i]);
304 		if (grump != NULL) {
305 			fprintf(stderr, "%d: %s $%d %s\n", line,
306 							type, i, grump);
307 			status = 1;
308 			err = 1;
309 		}
310 	}
311 
312 	regfree(&re);
313 }
314 
315 /*
316  - options - pick options out of a regression-test string
317  == int options(int type, char *s);
318  */
319 int
320 options(type, s)
321 int type;			/* 'c' compile, 'e' exec */
322 char *s;
323 {
324 	register char *p;
325 	register int o = (type == 'c') ? copts : eopts;
326 	register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
327 
328 	for (p = s; *p != '\0'; p++)
329 		if (strchr(legal, *p) != NULL)
330 			switch (*p) {
331 			case 'b':
332 				o &= ~REG_EXTENDED;
333 				break;
334 			case 'i':
335 				o |= REG_ICASE;
336 				break;
337 			case 's':
338 				o |= REG_NOSUB;
339 				break;
340 			case 'n':
341 				o |= REG_NEWLINE;
342 				break;
343 			case 'm':
344 				o &= ~REG_EXTENDED;
345 				o |= REG_NOSPEC;
346 				break;
347 			case 'p':
348 				o |= REG_PEND;
349 				break;
350 			case '^':
351 				o |= REG_NOTBOL;
352 				break;
353 			case '$':
354 				o |= REG_NOTEOL;
355 				break;
356 			case '#':
357 				o |= REG_STARTEND;
358 				break;
359 			case 't':	/* trace */
360 				o |= REG_TRACE;
361 				break;
362 			case 'l':	/* force long representation */
363 				o |= REG_LARGE;
364 				break;
365 			case 'r':	/* force backref use */
366 				o |= REG_BACKR;
367 				break;
368 			}
369 	return(o);
370 }
371 
372 /*
373  - opt - is a particular option in a regression string?
374  == int opt(int c, char *s);
375  */
376 int				/* predicate */
377 opt(c, s)
378 int c;
379 char *s;
380 {
381 	return(strchr(s, c) != NULL);
382 }
383 
384 /*
385  - fixstr - transform magic characters in strings
386  == void fixstr(register char *p);
387  */
388 void
389 fixstr(p)
390 register char *p;
391 {
392 	if (p == NULL)
393 		return;
394 
395 	for (; *p != '\0'; p++)
396 		if (*p == 'N')
397 			*p = '\n';
398 		else if (*p == 'T')
399 			*p = '\t';
400 		else if (*p == 'S')
401 			*p = ' ';
402 		else if (*p == 'Z')
403 			*p = '\0';
404 }
405 
406 /*
407  - check - check a substring match
408  == char *check(char *str, regmatch_t sub, char *should);
409  */
410 char *				/* NULL or complaint */
411 check(str, sub, should)
412 char *str;
413 regmatch_t sub;
414 char *should;
415 {
416 	register int len;
417 	register int shlen;
418 	register char *p;
419 	static char grump[500];
420 	register char *at = NULL;
421 
422 	if (should != NULL && strcmp(should, "-") == 0)
423 		should = NULL;
424 	if (should != NULL && should[0] == '@') {
425 		at = should + 1;
426 		should = "";
427 	}
428 
429 	/* check rm_so and rm_eo for consistency */
430 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
431 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
432 				(sub.rm_so != -1 && sub.rm_so < 0) ||
433 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
434 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
435 							(long)sub.rm_eo);
436 		return(grump);
437 	}
438 
439 	/* check for no match */
440 	if (sub.rm_so == -1 && should == NULL)
441 		return(NULL);
442 	if (sub.rm_so == -1)
443 		return("did not match");
444 
445 	/* check for in range */
446 	if (sub.rm_eo > strlen(str)) {
447 		sprintf(grump, "start %ld end %ld, past end of string",
448 					(long)sub.rm_so, (long)sub.rm_eo);
449 		return(grump);
450 	}
451 
452 	len = (int)(sub.rm_eo - sub.rm_so);
453 	shlen = (int)strlen(should);
454 	p = str + sub.rm_so;
455 
456 	/* check for not supposed to match */
457 	if (should == NULL) {
458 		sprintf(grump, "matched `%.*s'", len, p);
459 		return(grump);
460 	}
461 
462 	/* check for wrong match */
463 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
464 		sprintf(grump, "matched `%.*s' instead", len, p);
465 		return(grump);
466 	}
467 	if (shlen > 0)
468 		return(NULL);
469 
470 	/* check null match in right place */
471 	if (at == NULL)
472 		return(NULL);
473 	shlen = strlen(at);
474 	if (shlen == 0)
475 		shlen = 1;	/* force check for end-of-string */
476 	if (strncmp(p, at, shlen) != 0) {
477 		sprintf(grump, "matched null at `%.20s'", p);
478 		return(grump);
479 	}
480 	return(NULL);
481 }
482 
483 /*
484  - eprint - convert error number to name
485  == static char *eprint(int err);
486  */
487 static char *
488 eprint(err)
489 int err;
490 {
491 	static char epbuf[100];
492 	size_t len;
493 
494 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
495 	assert(len <= sizeof(epbuf));
496 	return(epbuf);
497 }
498 
499 /*
500  - efind - convert error name to number
501  == static int efind(char *name);
502  */
503 static int
504 efind(name)
505 char *name;
506 {
507 	static char efbuf[100];
508 	regex_t re;
509 
510 	sprintf(efbuf, "REG_%s", name);
511 	assert(strlen(efbuf) < sizeof(efbuf));
512 	re.re_endp = efbuf;
513 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
514 	return(atoi(efbuf));
515 }
516