1 /* $NetBSD: main.c,v 1.4 2021/02/23 17:13:44 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1993 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <assert.h>
30 #include <regex.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include <sys/types.h>
37
38 #include "test_regex.h"
39
40 char *progname;
41 int debug = 0;
42 int line = 0;
43 int status = 0;
44
45 int copts = REG_EXTENDED;
46 int eopts = 0;
47 regoff_t startoff = 0;
48 regoff_t endoff = 0;
49
50 static char empty = '\0';
51
52 static char *eprint(int);
53 static int efind(char *);
54
55 #ifndef REG_ATOI
56 #define REG_ATOI 0
57 #define REG_ITOA 0
58 #define REG_PEND 0
59 #define REG_TRACE 0
60 #define REG_BACKR 0
61 #define REG_NOSPEC 0
62 #define REG_LARGE 0
63 #endif
64
65 /*
66 * main - do the simple case, hand off to regress() for regression
67 */
68 int
main(int argc,char * argv[])69 main(int argc, char *argv[])
70 {
71 regex_t re;
72 # define NS 10
73 regmatch_t subs[NS];
74 char erbuf[100];
75 int err;
76 size_t len;
77 int c;
78 int errflg = 0;
79 int i;
80 extern int optind;
81 extern char *optarg;
82
83 progname = argv[0];
84
85 while ((c = getopt(argc, argv, "c:E:e:S:x")) != -1)
86 switch (c) {
87 case 'c': /* compile options */
88 copts = options('c', optarg);
89 break;
90 case 'e': /* execute options */
91 eopts = options('e', optarg);
92 break;
93 case 'E': /* end offset */
94 endoff = (regoff_t)atoi(optarg);
95 break;
96 case 'S': /* start offset */
97 startoff = (regoff_t)atoi(optarg);
98 break;
99 case 'x': /* Debugging. */
100 debug++;
101 break;
102 case '?':
103 default:
104 errflg++;
105 break;
106 }
107 if (errflg) {
108 fprintf(stderr, "usage: %s ", progname);
109 fprintf(stderr, "[-c copt][-C][-d] [re]\n");
110 exit(2);
111 }
112
113 if (optind >= argc) {
114 regress(stdin);
115 exit(status);
116 }
117
118 err = regcomp(&re, argv[optind++], copts);
119 if (err) {
120 len = regerror(err, &re, erbuf, sizeof(erbuf));
121 fprintf(stderr, "error %s, %zd/%zd `%s'\n",
122 eprint(err), len, (size_t)sizeof(erbuf), erbuf);
123 exit(status);
124 }
125 regprint(&re, stdout);
126
127 if (optind >= argc) {
128 regfree(&re);
129 exit(status);
130 }
131
132 if (eopts®_STARTEND) {
133 subs[0].rm_so = startoff;
134 subs[0].rm_eo = strlen(argv[optind]) - endoff;
135 }
136 err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
137 if (err) {
138 len = regerror(err, &re, erbuf, sizeof(erbuf));
139 fprintf(stderr, "error %s, %zd/%zd `%s'\n",
140 eprint(err), len, (size_t)sizeof(erbuf), erbuf);
141 exit(status);
142 }
143 if (!(copts®_NOSUB)) {
144 len = (int)(subs[0].rm_eo - subs[0].rm_so);
145 if (subs[0].rm_so != -1) {
146 if (len != 0)
147 printf("match `%.*s'\n", (int)len,
148 argv[optind] + subs[0].rm_so);
149 else
150 printf("match `'@%.1s\n",
151 argv[optind] + subs[0].rm_so);
152 }
153 for (i = 1; i < NS; i++)
154 if (subs[i].rm_so != -1)
155 printf("(%d) `%.*s'\n", i,
156 (int)(subs[i].rm_eo - subs[i].rm_so),
157 argv[optind] + subs[i].rm_so);
158 }
159 exit(status);
160 }
161
162 /*
163 * regress - main loop of regression test
164 */
165 void
regress(FILE * in)166 regress(FILE *in)
167 {
168 char inbuf[1000];
169 # define MAXF 10
170 char *f[MAXF];
171 int nf;
172 int i;
173 char erbuf[100];
174 size_t ne;
175 const char *badpat = "invalid regular expression";
176 # define SHORT 10
177 const char *bpname = "REG_BADPAT";
178 regex_t re;
179
180 while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
181 line++;
182 if (inbuf[0] == '#' || inbuf[0] == '\n')
183 continue; /* NOTE CONTINUE */
184 inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
185 if (debug)
186 fprintf(stdout, "%d:\n", line);
187 nf = split(inbuf, f, MAXF, "\t\t");
188 if (nf < 3) {
189 fprintf(stderr, "bad input, line %d\n", line);
190 exit(1);
191 }
192 for (i = 0; i < nf; i++)
193 if (strcmp(f[i], "\"\"") == 0)
194 f[i] = ∅
195 if (nf <= 3)
196 f[3] = NULL;
197 if (nf <= 4)
198 f[4] = NULL;
199 try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
200 if (opt('&', f[1])) /* try with either type of RE */
201 try(f[0], f[1], f[2], f[3], f[4],
202 options('c', f[1]) &~ REG_EXTENDED);
203 }
204
205 ne = regerror(REG_BADPAT, NULL, erbuf, sizeof(erbuf));
206 if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
207 fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
208 erbuf, badpat);
209 status = 1;
210 }
211 ne = regerror(REG_BADPAT, NULL, erbuf, (size_t)SHORT);
212 if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
213 ne != strlen(badpat)+1) {
214 fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
215 erbuf, SHORT-1, badpat);
216 status = 1;
217 }
218 ne = regerror(REG_ITOA|REG_BADPAT, NULL, erbuf, sizeof(erbuf));
219 if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
220 fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
221 erbuf, bpname);
222 status = 1;
223 }
224 #if REG_ATOI
225 re.re_endp = bpname;
226 #endif
227 ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
228 if (atoi(erbuf) != (int)REG_BADPAT) {
229 fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
230 erbuf, (long)REG_BADPAT);
231 status = 1;
232 } else if (ne != strlen(erbuf)+1) {
233 fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
234 erbuf, (long)REG_BADPAT);
235 status = 1;
236 }
237 }
238
239 /*
240 - try - try it, and report on problems
241 == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
242 */
243 void
try(char * f0,char * f1,char * f2,char * f3,char * f4,int opts)244 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
245 {
246 regex_t re;
247 # define NSUBS 10
248 regmatch_t subs[NSUBS];
249 # define NSHOULD 15
250 char *should[NSHOULD];
251 int nshould;
252 char erbuf[100];
253 int err;
254 int len;
255 const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
256 int i;
257 char *grump;
258 char f0copy[1000];
259 char f2copy[1000];
260
261 strcpy(f0copy, f0);
262 #if REG_ATOI
263 re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
264 #endif
265 fixstr(f0copy);
266 err = regcomp(&re, f0copy, opts);
267 if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
268 /* unexpected error or wrong error */
269 len = regerror(err, &re, erbuf, sizeof(erbuf));
270 fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
271 line, type, eprint(err), len,
272 (int)sizeof(erbuf), erbuf);
273 status = 1;
274 } else if (err == 0 && opt('C', f1)) {
275 /* unexpected success */
276 fprintf(stderr, "%d: %s should have given REG_%s\n",
277 line, type, f2);
278 status = 1;
279 err = 1; /* so we won't try regexec */
280 }
281
282 if (err != 0) {
283 regfree(&re);
284 return;
285 }
286
287 strcpy(f2copy, f2);
288 fixstr(f2copy);
289
290 if (options('e', f1)®_STARTEND) {
291 if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
292 fprintf(stderr, "%d: bad STARTEND syntax\n", line);
293 subs[0].rm_so = strchr(f2, '(') - f2 + 1;
294 subs[0].rm_eo = strchr(f2, ')') - f2;
295 }
296 err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
297
298 if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
299 /* unexpected error or wrong error */
300 len = regerror(err, &re, erbuf, sizeof(erbuf));
301 fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
302 line, type, eprint(err), len,
303 (int)sizeof(erbuf), erbuf);
304 status = 1;
305 } else if (err != 0) {
306 /* nothing more to check */
307 } else if (f3 == NULL) {
308 /* unexpected success */
309 fprintf(stderr, "%d: %s exec should have failed\n",
310 line, type);
311 status = 1;
312 err = 1; /* just on principle */
313 } else if (opts®_NOSUB) {
314 /* nothing more to check */
315 } else if ((grump = check(f2, subs[0], f3)) != NULL) {
316 fprintf(stderr, "%d: %s %s\n", line, type, grump);
317 status = 1;
318 err = 1;
319 }
320
321 if (err != 0 || f4 == NULL) {
322 regfree(&re);
323 return;
324 }
325
326 for (i = 1; i < NSHOULD; i++)
327 should[i] = NULL;
328 nshould = split(f4, &should[1], NSHOULD-1, ",");
329 if (nshould == 0) {
330 nshould = 1;
331 should[1] = ∅
332 }
333 for (i = 1; i < NSUBS; i++) {
334 grump = check(f2, subs[i], should[i]);
335 if (grump != NULL) {
336 fprintf(stderr, "%d: %s $%d %s\n", line,
337 type, i, grump);
338 status = 1;
339 err = 1;
340 }
341 }
342
343 regfree(&re);
344 }
345
346 /*
347 - options - pick options out of a regression-test string
348 == int options(int type, char *s);
349 */
350 int
options(int type,char * s)351 options(int type, char *s)
352 {
353 char *p;
354 int o = (type == 'c') ? copts : eopts;
355 const char *legal = (type == 'c') ? "bisnmpg" : "^$#tl";
356
357 for (p = s; *p != '\0'; p++)
358 if (strchr(legal, *p) != NULL)
359 switch (*p) {
360 case 'b':
361 o &= ~REG_EXTENDED;
362 break;
363 case 'i':
364 o |= REG_ICASE;
365 break;
366 case 's':
367 o |= REG_NOSUB;
368 break;
369 case 'n':
370 o |= REG_NEWLINE;
371 break;
372 case 'm':
373 o &= ~REG_EXTENDED;
374 o |= REG_NOSPEC;
375 break;
376 case 'p':
377 o |= REG_PEND;
378 break;
379 case 'g':
380 o |= REG_GNU;
381 break;
382 case '^':
383 o |= REG_NOTBOL;
384 break;
385 case '$':
386 o |= REG_NOTEOL;
387 break;
388 case '#':
389 o |= REG_STARTEND;
390 break;
391 case 't': /* trace */
392 o |= REG_TRACE;
393 break;
394 case 'l': /* force long representation */
395 o |= REG_LARGE;
396 break;
397 case 'r': /* force backref use */
398 o |= REG_BACKR;
399 break;
400 }
401 return(o);
402 }
403
404 /*
405 - opt - is a particular option in a regression string?
406 == int opt(int c, char *s);
407 */
408 int /* predicate */
opt(int c,char * s)409 opt(int c, char *s)
410 {
411 return(strchr(s, c) != NULL);
412 }
413
414 /*
415 - fixstr - transform magic characters in strings
416 == void fixstr(char *p);
417 */
418 void
fixstr(char * p)419 fixstr(char *p)
420 {
421 if (p == NULL)
422 return;
423
424 for (; *p != '\0'; p++)
425 if (*p == 'N')
426 *p = '\n';
427 else if (*p == 'T')
428 *p = '\t';
429 else if (*p == 'S')
430 *p = ' ';
431 else if (*p == 'Z')
432 *p = '\0';
433 }
434
435 /*
436 * check - check a substring match
437 */
438 char * /* NULL or complaint */
check(char * str,regmatch_t sub,char * should)439 check(char *str, regmatch_t sub, char *should)
440 {
441 int len;
442 int shlen;
443 char *p;
444 static char grump[500];
445 char *at = NULL;
446
447 if (should != NULL && strcmp(should, "-") == 0)
448 should = NULL;
449 if (should != NULL && should[0] == '@') {
450 at = should + 1;
451 should = ∅
452 }
453
454 /* check rm_so and rm_eo for consistency */
455 if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
456 (sub.rm_so != -1 && sub.rm_eo == -1) ||
457 (sub.rm_so != -1 && sub.rm_so < 0) ||
458 (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
459 sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
460 (long)sub.rm_eo);
461 return(grump);
462 }
463
464 /* check for no match */
465 if (sub.rm_so == -1) {
466 if (should == NULL)
467 return(NULL);
468 else {
469 sprintf(grump, "did not match");
470 return(grump);
471 }
472 }
473
474 /* check for in range */
475 if (sub.rm_eo > (ssize_t)strlen(str)) {
476 sprintf(grump, "start %ld end %ld, past end of string",
477 (long)sub.rm_so, (long)sub.rm_eo);
478 return(grump);
479 }
480
481 len = (int)(sub.rm_eo - sub.rm_so);
482 p = str + sub.rm_so;
483
484 /* check for not supposed to match */
485 if (should == NULL) {
486 sprintf(grump, "matched `%.*s'", len, p);
487 return(grump);
488 }
489
490 /* check for wrong match */
491 shlen = (int)strlen(should);
492 if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
493 sprintf(grump, "matched `%.*s' instead", len, p);
494 return(grump);
495 }
496 if (shlen > 0)
497 return(NULL);
498
499 /* check null match in right place */
500 if (at == NULL)
501 return(NULL);
502 shlen = strlen(at);
503 if (shlen == 0)
504 shlen = 1; /* force check for end-of-string */
505 if (strncmp(p, at, shlen) != 0) {
506 sprintf(grump, "matched null at `%.20s'", p);
507 return(grump);
508 }
509 return(NULL);
510 }
511
512 /*
513 * eprint - convert error number to name
514 */
515 static char *
eprint(int err)516 eprint(int err)
517 {
518 static char epbuf[100];
519 size_t len;
520
521 len = regerror(REG_ITOA|err, NULL, epbuf, sizeof(epbuf));
522 assert(len <= sizeof(epbuf));
523 return(epbuf);
524 }
525
526 /*
527 * efind - convert error name to number
528 */
529 static int
efind(char * name)530 efind(char *name)
531 {
532 static char efbuf[100];
533 regex_t re;
534
535 sprintf(efbuf, "REG_%s", name);
536 assert(strlen(efbuf) < sizeof(efbuf));
537 #if REG_ATOI
538 re.re_endp = efbuf;
539 #endif
540 (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
541 return(atoi(efbuf));
542 }
543