1 /* $NetBSD: main.c,v 1.2 2011/09/16 16:13:18 plunky Exp $ */
2
3 /*-
4 * Copyright (c) 1993 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <assert.h>
30 #include <regex.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include <sys/types.h>
37
38 #include "test_regex.h"
39
40 char *progname;
41 int debug = 0;
42 int line = 0;
43 int status = 0;
44
45 int copts = REG_EXTENDED;
46 int eopts = 0;
47 regoff_t startoff = 0;
48 regoff_t endoff = 0;
49
50 static char empty = '\0';
51
52 static char *eprint(int);
53 static int efind(char *);
54
55 /*
56 * main - do the simple case, hand off to regress() for regression
57 */
58 int
main(int argc,char * argv[])59 main(int argc, char *argv[])
60 {
61 regex_t re;
62 # define NS 10
63 regmatch_t subs[NS];
64 char erbuf[100];
65 int err;
66 size_t len;
67 int c;
68 int errflg = 0;
69 int i;
70 extern int optind;
71 extern char *optarg;
72
73 progname = argv[0];
74
75 while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
76 switch (c) {
77 case 'c': /* compile options */
78 copts = options('c', optarg);
79 break;
80 case 'e': /* execute options */
81 eopts = options('e', optarg);
82 break;
83 case 'S': /* start offset */
84 startoff = (regoff_t)atoi(optarg);
85 break;
86 case 'E': /* end offset */
87 endoff = (regoff_t)atoi(optarg);
88 break;
89 case 'x': /* Debugging. */
90 debug++;
91 break;
92 case '?':
93 default:
94 errflg++;
95 break;
96 }
97 if (errflg) {
98 fprintf(stderr, "usage: %s ", progname);
99 fprintf(stderr, "[-c copt][-C][-d] [re]\n");
100 exit(2);
101 }
102
103 if (optind >= argc) {
104 regress(stdin);
105 exit(status);
106 }
107
108 err = regcomp(&re, argv[optind++], copts);
109 if (err) {
110 len = regerror(err, &re, erbuf, sizeof(erbuf));
111 fprintf(stderr, "error %s, %zd/%zd `%s'\n",
112 eprint(err), len, (size_t)sizeof(erbuf), erbuf);
113 exit(status);
114 }
115 regprint(&re, stdout);
116
117 if (optind >= argc) {
118 regfree(&re);
119 exit(status);
120 }
121
122 if (eopts®_STARTEND) {
123 subs[0].rm_so = startoff;
124 subs[0].rm_eo = strlen(argv[optind]) - endoff;
125 }
126 err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
127 if (err) {
128 len = regerror(err, &re, erbuf, sizeof(erbuf));
129 fprintf(stderr, "error %s, %zd/%zd `%s'\n",
130 eprint(err), len, (size_t)sizeof(erbuf), erbuf);
131 exit(status);
132 }
133 if (!(copts®_NOSUB)) {
134 len = (int)(subs[0].rm_eo - subs[0].rm_so);
135 if (subs[0].rm_so != -1) {
136 if (len != 0)
137 printf("match `%.*s'\n", (int)len,
138 argv[optind] + subs[0].rm_so);
139 else
140 printf("match `'@%.1s\n",
141 argv[optind] + subs[0].rm_so);
142 }
143 for (i = 1; i < NS; i++)
144 if (subs[i].rm_so != -1)
145 printf("(%d) `%.*s'\n", i,
146 (int)(subs[i].rm_eo - subs[i].rm_so),
147 argv[optind] + subs[i].rm_so);
148 }
149 exit(status);
150 }
151
152 /*
153 * regress - main loop of regression test
154 */
155 void
regress(FILE * in)156 regress(FILE *in)
157 {
158 char inbuf[1000];
159 # define MAXF 10
160 char *f[MAXF];
161 int nf;
162 int i;
163 char erbuf[100];
164 size_t ne;
165 const char *badpat = "invalid regular expression";
166 # define SHORT 10
167 const char *bpname = "REG_BADPAT";
168 regex_t re;
169
170 while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
171 line++;
172 if (inbuf[0] == '#' || inbuf[0] == '\n')
173 continue; /* NOTE CONTINUE */
174 inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
175 if (debug)
176 fprintf(stdout, "%d:\n", line);
177 nf = split(inbuf, f, MAXF, "\t\t");
178 if (nf < 3) {
179 fprintf(stderr, "bad input, line %d\n", line);
180 exit(1);
181 }
182 for (i = 0; i < nf; i++)
183 if (strcmp(f[i], "\"\"") == 0)
184 f[i] = ∅
185 if (nf <= 3)
186 f[3] = NULL;
187 if (nf <= 4)
188 f[4] = NULL;
189 try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
190 if (opt('&', f[1])) /* try with either type of RE */
191 try(f[0], f[1], f[2], f[3], f[4],
192 options('c', f[1]) &~ REG_EXTENDED);
193 }
194
195 ne = regerror(REG_BADPAT, NULL, erbuf, sizeof(erbuf));
196 if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
197 fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
198 erbuf, badpat);
199 status = 1;
200 }
201 ne = regerror(REG_BADPAT, NULL, erbuf, (size_t)SHORT);
202 if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
203 ne != strlen(badpat)+1) {
204 fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
205 erbuf, SHORT-1, badpat);
206 status = 1;
207 }
208 ne = regerror(REG_ITOA|REG_BADPAT, NULL, erbuf, sizeof(erbuf));
209 if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
210 fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
211 erbuf, bpname);
212 status = 1;
213 }
214 re.re_endp = bpname;
215 ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
216 if (atoi(erbuf) != (int)REG_BADPAT) {
217 fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
218 erbuf, (long)REG_BADPAT);
219 status = 1;
220 } else if (ne != strlen(erbuf)+1) {
221 fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
222 erbuf, (long)REG_BADPAT);
223 status = 1;
224 }
225 }
226
227 /*
228 - try - try it, and report on problems
229 == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
230 */
231 void
try(char * f0,char * f1,char * f2,char * f3,char * f4,int opts)232 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
233 {
234 regex_t re;
235 # define NSUBS 10
236 regmatch_t subs[NSUBS];
237 # define NSHOULD 15
238 char *should[NSHOULD];
239 int nshould;
240 char erbuf[100];
241 int err;
242 int len;
243 const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
244 int i;
245 char *grump;
246 char f0copy[1000];
247 char f2copy[1000];
248
249 strcpy(f0copy, f0);
250 re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
251 fixstr(f0copy);
252 err = regcomp(&re, f0copy, opts);
253 if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
254 /* unexpected error or wrong error */
255 len = regerror(err, &re, erbuf, sizeof(erbuf));
256 fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
257 line, type, eprint(err), len,
258 (int)sizeof(erbuf), erbuf);
259 status = 1;
260 } else if (err == 0 && opt('C', f1)) {
261 /* unexpected success */
262 fprintf(stderr, "%d: %s should have given REG_%s\n",
263 line, type, f2);
264 status = 1;
265 err = 1; /* so we won't try regexec */
266 }
267
268 if (err != 0) {
269 regfree(&re);
270 return;
271 }
272
273 strcpy(f2copy, f2);
274 fixstr(f2copy);
275
276 if (options('e', f1)®_STARTEND) {
277 if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
278 fprintf(stderr, "%d: bad STARTEND syntax\n", line);
279 subs[0].rm_so = strchr(f2, '(') - f2 + 1;
280 subs[0].rm_eo = strchr(f2, ')') - f2;
281 }
282 err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
283
284 if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
285 /* unexpected error or wrong error */
286 len = regerror(err, &re, erbuf, sizeof(erbuf));
287 fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
288 line, type, eprint(err), len,
289 (int)sizeof(erbuf), erbuf);
290 status = 1;
291 } else if (err != 0) {
292 /* nothing more to check */
293 } else if (f3 == NULL) {
294 /* unexpected success */
295 fprintf(stderr, "%d: %s exec should have failed\n",
296 line, type);
297 status = 1;
298 err = 1; /* just on principle */
299 } else if (opts®_NOSUB) {
300 /* nothing more to check */
301 } else if ((grump = check(f2, subs[0], f3)) != NULL) {
302 fprintf(stderr, "%d: %s %s\n", line, type, grump);
303 status = 1;
304 err = 1;
305 }
306
307 if (err != 0 || f4 == NULL) {
308 regfree(&re);
309 return;
310 }
311
312 for (i = 1; i < NSHOULD; i++)
313 should[i] = NULL;
314 nshould = split(f4, &should[1], NSHOULD-1, ",");
315 if (nshould == 0) {
316 nshould = 1;
317 should[1] = ∅
318 }
319 for (i = 1; i < NSUBS; i++) {
320 grump = check(f2, subs[i], should[i]);
321 if (grump != NULL) {
322 fprintf(stderr, "%d: %s $%d %s\n", line,
323 type, i, grump);
324 status = 1;
325 err = 1;
326 }
327 }
328
329 regfree(&re);
330 }
331
332 /*
333 - options - pick options out of a regression-test string
334 == int options(int type, char *s);
335 */
336 int
options(int type,char * s)337 options(int type, char *s)
338 {
339 char *p;
340 int o = (type == 'c') ? copts : eopts;
341 const char *legal = (type == 'c') ? "bisnmpP" : "^$#tl";
342
343 for (p = s; *p != '\0'; p++)
344 if (strchr(legal, *p) != NULL)
345 switch (*p) {
346 case 'b':
347 o &= ~REG_EXTENDED;
348 break;
349 case 'i':
350 o |= REG_ICASE;
351 break;
352 case 's':
353 o |= REG_NOSUB;
354 break;
355 case 'n':
356 o |= REG_NEWLINE;
357 break;
358 case 'm':
359 o &= ~REG_EXTENDED;
360 o |= REG_NOSPEC;
361 break;
362 case 'p':
363 o |= REG_PEND;
364 break;
365 case 'P':
366 o |= REG_POSIX;
367 break;
368 case '^':
369 o |= REG_NOTBOL;
370 break;
371 case '$':
372 o |= REG_NOTEOL;
373 break;
374 case '#':
375 o |= REG_STARTEND;
376 break;
377 case 't': /* trace */
378 o |= REG_TRACE;
379 break;
380 case 'l': /* force long representation */
381 o |= REG_LARGE;
382 break;
383 case 'r': /* force backref use */
384 o |= REG_BACKR;
385 break;
386 }
387 return(o);
388 }
389
390 /*
391 - opt - is a particular option in a regression string?
392 == int opt(int c, char *s);
393 */
394 int /* predicate */
opt(int c,char * s)395 opt(int c, char *s)
396 {
397 return(strchr(s, c) != NULL);
398 }
399
400 /*
401 - fixstr - transform magic characters in strings
402 == void fixstr(char *p);
403 */
404 void
fixstr(char * p)405 fixstr(char *p)
406 {
407 if (p == NULL)
408 return;
409
410 for (; *p != '\0'; p++)
411 if (*p == 'N')
412 *p = '\n';
413 else if (*p == 'T')
414 *p = '\t';
415 else if (*p == 'S')
416 *p = ' ';
417 else if (*p == 'Z')
418 *p = '\0';
419 }
420
421 /*
422 * check - check a substring match
423 */
424 char * /* NULL or complaint */
check(char * str,regmatch_t sub,char * should)425 check(char *str, regmatch_t sub, char *should)
426 {
427 int len;
428 int shlen;
429 char *p;
430 static char grump[500];
431 char *at = NULL;
432
433 if (should != NULL && strcmp(should, "-") == 0)
434 should = NULL;
435 if (should != NULL && should[0] == '@') {
436 at = should + 1;
437 should = ∅
438 }
439
440 /* check rm_so and rm_eo for consistency */
441 if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
442 (sub.rm_so != -1 && sub.rm_eo == -1) ||
443 (sub.rm_so != -1 && sub.rm_so < 0) ||
444 (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
445 sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
446 (long)sub.rm_eo);
447 return(grump);
448 }
449
450 /* check for no match */
451 if (sub.rm_so == -1) {
452 if (should == NULL)
453 return(NULL);
454 else {
455 sprintf(grump, "did not match");
456 return(grump);
457 }
458 }
459
460 /* check for in range */
461 if (sub.rm_eo > (ssize_t)strlen(str)) {
462 sprintf(grump, "start %ld end %ld, past end of string",
463 (long)sub.rm_so, (long)sub.rm_eo);
464 return(grump);
465 }
466
467 len = (int)(sub.rm_eo - sub.rm_so);
468 p = str + sub.rm_so;
469
470 /* check for not supposed to match */
471 if (should == NULL) {
472 sprintf(grump, "matched `%.*s'", len, p);
473 return(grump);
474 }
475
476 /* check for wrong match */
477 shlen = (int)strlen(should);
478 if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
479 sprintf(grump, "matched `%.*s' instead", len, p);
480 return(grump);
481 }
482 if (shlen > 0)
483 return(NULL);
484
485 /* check null match in right place */
486 if (at == NULL)
487 return(NULL);
488 shlen = strlen(at);
489 if (shlen == 0)
490 shlen = 1; /* force check for end-of-string */
491 if (strncmp(p, at, shlen) != 0) {
492 sprintf(grump, "matched null at `%.20s'", p);
493 return(grump);
494 }
495 return(NULL);
496 }
497
498 /*
499 * eprint - convert error number to name
500 */
501 static char *
eprint(int err)502 eprint(int err)
503 {
504 static char epbuf[100];
505 size_t len;
506
507 len = regerror(REG_ITOA|err, NULL, epbuf, sizeof(epbuf));
508 assert(len <= sizeof(epbuf));
509 return(epbuf);
510 }
511
512 /*
513 * efind - convert error name to number
514 */
515 static int
efind(char * name)516 efind(char *name)
517 {
518 static char efbuf[100];
519 regex_t re;
520
521 sprintf(efbuf, "REG_%s", name);
522 assert(strlen(efbuf) < sizeof(efbuf));
523 re.re_endp = efbuf;
524 (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
525 return(atoi(efbuf));
526 }
527