xref: /netbsd-src/tests/lib/libc/regex/main.c (revision 8621598a2680b3b4666324df3054a2ad264d069c)
1 /*	$NetBSD: main.c,v 1.4 2021/02/23 17:13:44 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <assert.h>
30 #include <regex.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 
36 #include <sys/types.h>
37 
38 #include "test_regex.h"
39 
40 char *progname;
41 int debug = 0;
42 int line = 0;
43 int status = 0;
44 
45 int copts = REG_EXTENDED;
46 int eopts = 0;
47 regoff_t startoff = 0;
48 regoff_t endoff = 0;
49 
50 static char empty = '\0';
51 
52 static char *eprint(int);
53 static int efind(char *);
54 
55 #ifndef REG_ATOI
56 #define REG_ATOI 0
57 #define REG_ITOA 0
58 #define REG_PEND 0
59 #define REG_TRACE 0
60 #define REG_BACKR 0
61 #define REG_NOSPEC 0
62 #define REG_LARGE 0
63 #endif
64 
65 /*
66  * main - do the simple case, hand off to regress() for regression
67  */
68 int
main(int argc,char * argv[])69 main(int argc, char *argv[])
70 {
71 	regex_t re;
72 #	define	NS	10
73 	regmatch_t subs[NS];
74 	char erbuf[100];
75 	int err;
76 	size_t len;
77 	int c;
78 	int errflg = 0;
79 	int i;
80 	extern int optind;
81 	extern char *optarg;
82 
83 	progname = argv[0];
84 
85 	while ((c = getopt(argc, argv, "c:E:e:S:x")) != -1)
86 		switch (c) {
87 		case 'c':	/* compile options */
88 			copts = options('c', optarg);
89 			break;
90 		case 'e':	/* execute options */
91 			eopts = options('e', optarg);
92 			break;
93 		case 'E':	/* end offset */
94 			endoff = (regoff_t)atoi(optarg);
95 			break;
96 		case 'S':	/* start offset */
97 			startoff = (regoff_t)atoi(optarg);
98 			break;
99 		case 'x':	/* Debugging. */
100 			debug++;
101 			break;
102 		case '?':
103 		default:
104 			errflg++;
105 			break;
106 		}
107 	if (errflg) {
108 		fprintf(stderr, "usage: %s ", progname);
109 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
110 		exit(2);
111 	}
112 
113 	if (optind >= argc) {
114 		regress(stdin);
115 		exit(status);
116 	}
117 
118 	err = regcomp(&re, argv[optind++], copts);
119 	if (err) {
120 		len = regerror(err, &re, erbuf, sizeof(erbuf));
121 		fprintf(stderr, "error %s, %zd/%zd `%s'\n",
122 			eprint(err), len, (size_t)sizeof(erbuf), erbuf);
123 		exit(status);
124 	}
125 	regprint(&re, stdout);
126 
127 	if (optind >= argc) {
128 		regfree(&re);
129 		exit(status);
130 	}
131 
132 	if (eopts&REG_STARTEND) {
133 		subs[0].rm_so = startoff;
134 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
135 	}
136 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
137 	if (err) {
138 		len = regerror(err, &re, erbuf, sizeof(erbuf));
139 		fprintf(stderr, "error %s, %zd/%zd `%s'\n",
140 			eprint(err), len, (size_t)sizeof(erbuf), erbuf);
141 		exit(status);
142 	}
143 	if (!(copts&REG_NOSUB)) {
144 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
145 		if (subs[0].rm_so != -1) {
146 			if (len != 0)
147 				printf("match `%.*s'\n", (int)len,
148 					argv[optind] + subs[0].rm_so);
149 			else
150 				printf("match `'@%.1s\n",
151 					argv[optind] + subs[0].rm_so);
152 		}
153 		for (i = 1; i < NS; i++)
154 			if (subs[i].rm_so != -1)
155 				printf("(%d) `%.*s'\n", i,
156 					(int)(subs[i].rm_eo - subs[i].rm_so),
157 					argv[optind] + subs[i].rm_so);
158 	}
159 	exit(status);
160 }
161 
162 /*
163  * regress - main loop of regression test
164  */
165 void
regress(FILE * in)166 regress(FILE *in)
167 {
168 	char inbuf[1000];
169 #	define	MAXF	10
170 	char *f[MAXF];
171 	int nf;
172 	int i;
173 	char erbuf[100];
174 	size_t ne;
175 	const char *badpat = "invalid regular expression";
176 #	define	SHORT	10
177 	const char *bpname = "REG_BADPAT";
178 	regex_t re;
179 
180 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
181 		line++;
182 		if (inbuf[0] == '#' || inbuf[0] == '\n')
183 			continue;			/* NOTE CONTINUE */
184 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
185 		if (debug)
186 			fprintf(stdout, "%d:\n", line);
187 		nf = split(inbuf, f, MAXF, "\t\t");
188 		if (nf < 3) {
189 			fprintf(stderr, "bad input, line %d\n", line);
190 			exit(1);
191 		}
192 		for (i = 0; i < nf; i++)
193 			if (strcmp(f[i], "\"\"") == 0)
194 				f[i] = &empty;
195 		if (nf <= 3)
196 			f[3] = NULL;
197 		if (nf <= 4)
198 			f[4] = NULL;
199 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
200 		if (opt('&', f[1]))	/* try with either type of RE */
201 			try(f[0], f[1], f[2], f[3], f[4],
202 					options('c', f[1]) &~ REG_EXTENDED);
203 	}
204 
205 	ne = regerror(REG_BADPAT, NULL, erbuf, sizeof(erbuf));
206 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
207 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
208 							erbuf, badpat);
209 		status = 1;
210 	}
211 	ne = regerror(REG_BADPAT, NULL, erbuf, (size_t)SHORT);
212 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
213 						ne != strlen(badpat)+1) {
214 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
215 						erbuf, SHORT-1, badpat);
216 		status = 1;
217 	}
218 	ne = regerror(REG_ITOA|REG_BADPAT, NULL, erbuf, sizeof(erbuf));
219 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
220 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
221 						erbuf, bpname);
222 		status = 1;
223 	}
224 #if REG_ATOI
225 	re.re_endp = bpname;
226 #endif
227 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
228 	if (atoi(erbuf) != (int)REG_BADPAT) {
229 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
230 						erbuf, (long)REG_BADPAT);
231 		status = 1;
232 	} else if (ne != strlen(erbuf)+1) {
233 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
234 						erbuf, (long)REG_BADPAT);
235 		status = 1;
236 	}
237 }
238 
239 /*
240  - try - try it, and report on problems
241  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
242  */
243 void
try(char * f0,char * f1,char * f2,char * f3,char * f4,int opts)244 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
245 {
246 	regex_t re;
247 #	define	NSUBS	10
248 	regmatch_t subs[NSUBS];
249 #	define	NSHOULD	15
250 	char *should[NSHOULD];
251 	int nshould;
252 	char erbuf[100];
253 	int err;
254 	int len;
255 	const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
256 	int i;
257 	char *grump;
258 	char f0copy[1000];
259 	char f2copy[1000];
260 
261 	strcpy(f0copy, f0);
262 #if REG_ATOI
263 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
264 #endif
265 	fixstr(f0copy);
266 	err = regcomp(&re, f0copy, opts);
267 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
268 		/* unexpected error or wrong error */
269 		len = regerror(err, &re, erbuf, sizeof(erbuf));
270 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
271 					line, type, eprint(err), len,
272 					(int)sizeof(erbuf), erbuf);
273 		status = 1;
274 	} else if (err == 0 && opt('C', f1)) {
275 		/* unexpected success */
276 		fprintf(stderr, "%d: %s should have given REG_%s\n",
277 						line, type, f2);
278 		status = 1;
279 		err = 1;	/* so we won't try regexec */
280 	}
281 
282 	if (err != 0) {
283 		regfree(&re);
284 		return;
285 	}
286 
287 	strcpy(f2copy, f2);
288 	fixstr(f2copy);
289 
290 	if (options('e', f1)&REG_STARTEND) {
291 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
292 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
293 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
294 		subs[0].rm_eo = strchr(f2, ')') - f2;
295 	}
296 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
297 
298 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
299 		/* unexpected error or wrong error */
300 		len = regerror(err, &re, erbuf, sizeof(erbuf));
301 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
302 					line, type, eprint(err), len,
303 					(int)sizeof(erbuf), erbuf);
304 		status = 1;
305 	} else if (err != 0) {
306 		/* nothing more to check */
307 	} else if (f3 == NULL) {
308 		/* unexpected success */
309 		fprintf(stderr, "%d: %s exec should have failed\n",
310 						line, type);
311 		status = 1;
312 		err = 1;		/* just on principle */
313 	} else if (opts&REG_NOSUB) {
314 		/* nothing more to check */
315 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
316 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
317 		status = 1;
318 		err = 1;
319 	}
320 
321 	if (err != 0 || f4 == NULL) {
322 		regfree(&re);
323 		return;
324 	}
325 
326 	for (i = 1; i < NSHOULD; i++)
327 		should[i] = NULL;
328 	nshould = split(f4, &should[1], NSHOULD-1, ",");
329 	if (nshould == 0) {
330 		nshould = 1;
331 		should[1] = &empty;
332 	}
333 	for (i = 1; i < NSUBS; i++) {
334 		grump = check(f2, subs[i], should[i]);
335 		if (grump != NULL) {
336 			fprintf(stderr, "%d: %s $%d %s\n", line,
337 							type, i, grump);
338 			status = 1;
339 			err = 1;
340 		}
341 	}
342 
343 	regfree(&re);
344 }
345 
346 /*
347  - options - pick options out of a regression-test string
348  == int options(int type, char *s);
349  */
350 int
options(int type,char * s)351 options(int type, char *s)
352 {
353 	char *p;
354 	int o = (type == 'c') ? copts : eopts;
355 	const char *legal = (type == 'c') ? "bisnmpg" : "^$#tl";
356 
357 	for (p = s; *p != '\0'; p++)
358 		if (strchr(legal, *p) != NULL)
359 			switch (*p) {
360 			case 'b':
361 				o &= ~REG_EXTENDED;
362 				break;
363 			case 'i':
364 				o |= REG_ICASE;
365 				break;
366 			case 's':
367 				o |= REG_NOSUB;
368 				break;
369 			case 'n':
370 				o |= REG_NEWLINE;
371 				break;
372 			case 'm':
373 				o &= ~REG_EXTENDED;
374 				o |= REG_NOSPEC;
375 				break;
376 			case 'p':
377 				o |= REG_PEND;
378 				break;
379 			case 'g':
380 				o |= REG_GNU;
381 				break;
382 			case '^':
383 				o |= REG_NOTBOL;
384 				break;
385 			case '$':
386 				o |= REG_NOTEOL;
387 				break;
388 			case '#':
389 				o |= REG_STARTEND;
390 				break;
391 			case 't':	/* trace */
392 				o |= REG_TRACE;
393 				break;
394 			case 'l':	/* force long representation */
395 				o |= REG_LARGE;
396 				break;
397 			case 'r':	/* force backref use */
398 				o |= REG_BACKR;
399 				break;
400 			}
401 	return(o);
402 }
403 
404 /*
405  - opt - is a particular option in a regression string?
406  == int opt(int c, char *s);
407  */
408 int				/* predicate */
opt(int c,char * s)409 opt(int c, char *s)
410 {
411 	return(strchr(s, c) != NULL);
412 }
413 
414 /*
415  - fixstr - transform magic characters in strings
416  == void fixstr(char *p);
417  */
418 void
fixstr(char * p)419 fixstr(char *p)
420 {
421 	if (p == NULL)
422 		return;
423 
424 	for (; *p != '\0'; p++)
425 		if (*p == 'N')
426 			*p = '\n';
427 		else if (*p == 'T')
428 			*p = '\t';
429 		else if (*p == 'S')
430 			*p = ' ';
431 		else if (*p == 'Z')
432 			*p = '\0';
433 }
434 
435 /*
436  * check - check a substring match
437  */
438 char *				/* NULL or complaint */
check(char * str,regmatch_t sub,char * should)439 check(char *str, regmatch_t sub, char *should)
440 {
441 	int len;
442 	int shlen;
443 	char *p;
444 	static char grump[500];
445 	char *at = NULL;
446 
447 	if (should != NULL && strcmp(should, "-") == 0)
448 		should = NULL;
449 	if (should != NULL && should[0] == '@') {
450 		at = should + 1;
451 		should = &empty;
452 	}
453 
454 	/* check rm_so and rm_eo for consistency */
455 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
456 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
457 				(sub.rm_so != -1 && sub.rm_so < 0) ||
458 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
459 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
460 							(long)sub.rm_eo);
461 		return(grump);
462 	}
463 
464 	/* check for no match */
465 	if (sub.rm_so == -1) {
466 		if (should == NULL)
467 			return(NULL);
468 		else {
469 			sprintf(grump, "did not match");
470 			return(grump);
471 		}
472 	}
473 
474 	/* check for in range */
475 	if (sub.rm_eo > (ssize_t)strlen(str)) {
476 		sprintf(grump, "start %ld end %ld, past end of string",
477 					(long)sub.rm_so, (long)sub.rm_eo);
478 		return(grump);
479 	}
480 
481 	len = (int)(sub.rm_eo - sub.rm_so);
482 	p = str + sub.rm_so;
483 
484 	/* check for not supposed to match */
485 	if (should == NULL) {
486 		sprintf(grump, "matched `%.*s'", len, p);
487 		return(grump);
488 	}
489 
490 	/* check for wrong match */
491 	shlen = (int)strlen(should);
492 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
493 		sprintf(grump, "matched `%.*s' instead", len, p);
494 		return(grump);
495 	}
496 	if (shlen > 0)
497 		return(NULL);
498 
499 	/* check null match in right place */
500 	if (at == NULL)
501 		return(NULL);
502 	shlen = strlen(at);
503 	if (shlen == 0)
504 		shlen = 1;	/* force check for end-of-string */
505 	if (strncmp(p, at, shlen) != 0) {
506 		sprintf(grump, "matched null at `%.20s'", p);
507 		return(grump);
508 	}
509 	return(NULL);
510 }
511 
512 /*
513  * eprint - convert error number to name
514  */
515 static char *
eprint(int err)516 eprint(int err)
517 {
518 	static char epbuf[100];
519 	size_t len;
520 
521 	len = regerror(REG_ITOA|err, NULL, epbuf, sizeof(epbuf));
522 	assert(len <= sizeof(epbuf));
523 	return(epbuf);
524 }
525 
526 /*
527  * efind - convert error name to number
528  */
529 static int
efind(char * name)530 efind(char *name)
531 {
532 	static char efbuf[100];
533 	regex_t re;
534 
535 	sprintf(efbuf, "REG_%s", name);
536 	assert(strlen(efbuf) < sizeof(efbuf));
537 #if REG_ATOI
538 	re.re_endp = efbuf;
539 #endif
540 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
541 	return(atoi(efbuf));
542 }
543