xref: /netbsd-src/games/quiz/rxp.c (revision 2980e352a13e8f0b545a366830c411e7a542ada8)
1 /*	$NetBSD: rxp.c,v 1.12 2004/01/27 20:30:30 jsm Exp $	*/
2 
3 /*-
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
9  * Commodore Business Machines.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 #if 0
39 static char sccsid[] = "@(#)rxp.c	8.1 (Berkeley) 5/31/93";
40 #else
41 __RCSID("$NetBSD: rxp.c,v 1.12 2004/01/27 20:30:30 jsm Exp $");
42 #endif
43 #endif /* not lint */
44 
45 /*
46  * regular expression parser
47  *
48  * external functions and return values are:
49  * rxp_compile(s)
50  *	TRUE	success
51  *	FALSE	parse failure; error message will be in char rxperr[]
52  * metas are:
53  *	{...}	optional pattern, equialent to [...|]
54  *	|	alternate pattern
55  *	[...]	pattern delimiters
56  *
57  * rxp_match(s)
58  *	TRUE	string s matches compiled pattern
59  *	FALSE	match failure or regexp error
60  *
61  * rxp_expand()
62  *	char *	reverse-engineered regular expression string
63  *	NULL	regexp error
64  */
65 
66 #include <stdio.h>
67 #include <stdlib.h>
68 #include <ctype.h>
69 #include "quiz.h"
70 					/* regexp tokens,	arg */
71 #define	LIT	(-1)			/* literal character,	char */
72 #define	SOT	(-2)			/* start text anchor,	- */
73 #define	EOT	(-3)			/* end text anchor,	- */
74 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
75 #define	GRP_E	(-5)			/* end group,		- */
76 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
77 #define	ALT_E	(-7)			/* alternate ends,	- */
78 #define	END	(-8)			/* end of regexp,	- */
79 
80 typedef short Rxp_t;			/* type for regexp tokens */
81 
82 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
83 char rxperr[128];			/* parser error message */
84 
85 static int	 rxp__compile(const char *, int);
86 static char	*rxp__expand(int);
87 static int	 rxp__match(const char *, int, Rxp_t *, Rxp_t *, const char *);
88 
89 int
90 rxp_compile(s)
91 	const char *	s;
92 {
93 	return (rxp__compile(s, TRUE));
94 }
95 
96 static int
97 rxp__compile(s, first)
98 	const char *s;
99 	int first;
100 {
101 	static Rxp_t *rp;
102 	static const char *sp;
103 	Rxp_t *grp_ptr;
104 	Rxp_t *alt_ptr;
105 	int esc, err;
106 
107 	esc = 0;
108 	if (first) {
109 		rp = rxpbuf;
110 		sp = s;
111 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
112 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
113 		*rp++ = 0;
114 	}
115 	*rp++ = ALT_S;
116 	alt_ptr = rp;
117 	*rp++ = 0;
118 	for (; *sp; ++sp) {
119 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
120 			(void)snprintf(rxperr, sizeof(rxperr),
121 			    "regular expression too long %s", s);
122 			return (FALSE);
123 		}
124 		if (*sp == ':' && !esc)
125 			break;
126 		if (esc) {
127 			*rp++ = LIT;
128 			*rp++ = *sp;
129 			esc = 0;
130 		}
131 		else switch (*sp) {
132 		case '\\':
133 			esc = 1;
134 			break;
135 		case '{':
136 		case '[':
137 			*rp++ = GRP_S;
138 			grp_ptr = rp;
139 			*rp++ = 0;
140 			sp++;
141 			if ((err = rxp__compile(s, FALSE)) != TRUE)
142 				return (err);
143 			*rp++ = GRP_E;
144 			*grp_ptr = rp - rxpbuf;
145 			break;
146 		case '}':
147 		case ']':
148 		case '|':
149 			*rp++ = ALT_E;
150 			*alt_ptr = rp - rxpbuf;
151 			if (*sp != ']') {
152 				*rp++ = ALT_S;
153 				alt_ptr = rp;
154 				*rp++ = 0;
155 			}
156 			if (*sp != '|') {
157 				if (*sp != ']') {
158 					*rp++ = ALT_E;
159 					*alt_ptr = rp - rxpbuf;
160 				}
161 				if (first) {
162 					(void)snprintf(rxperr, sizeof(rxperr),
163 					    "unmatched alternator in regexp %s",
164 					     s);
165 					return (FALSE);
166 				}
167 				return (TRUE);
168 			}
169 			break;
170 		default:
171 			*rp++ = LIT;
172 			*rp++ = *sp;
173 			esc = 0;
174 			break;
175 		}
176 	}
177 	if (!first) {
178 		(void)snprintf(rxperr, sizeof(rxperr),
179 		    "unmatched alternator in regexp %s", s);
180 		return (FALSE);
181 	}
182 	*rp++ = ALT_E;
183 	*alt_ptr = rp - rxpbuf;
184 	*rp++ = GRP_E;
185 	*(rxpbuf + 2) = rp - rxpbuf;
186 	*rp++ = EOT;
187 	*rp = END;
188 	return (TRUE);
189 }
190 
191 /*
192  * match string against compiled regular expression
193  */
194 int
195 rxp_match(s)
196 	const char *	s;
197 {
198 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
199 }
200 
201 static int
202 rxp__match(s, first, j_succ, j_fail, sp_fail)
203 	const char *s;
204 	int first;
205 	Rxp_t *j_succ;		/* jump here on successful alt match */
206 	Rxp_t *j_fail;		/* jump here on failed match */
207 	const char *sp_fail;		/* reset sp to here on failed match */
208 {
209 	static Rxp_t *rp;
210 	static const char *sp;
211 	int ch;
212 	Rxp_t *grp_end = NULL;
213 
214 	if (first) {
215 		rp = rxpbuf;
216 		sp = s;
217 	}
218 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
219 		switch(*rp) {
220 		case LIT:
221 			rp++;
222 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
223 			if (ch != *sp++) {
224 				rp = j_fail;
225 				sp = sp_fail;
226 				return (FALSE);
227 			}
228 			rp++;
229 			break;
230 		case SOT:
231 			if (sp != s)
232 				return (FALSE);
233 			rp++;
234 			break;
235 		case EOT:
236 			if (*sp != 0)
237 				return (FALSE);
238 			rp++;
239 			break;
240 		case GRP_S:
241 			rp++;
242 			grp_end = rxpbuf + *rp++;
243 			break;
244 		case ALT_S:
245 			rp++;
246 			rxp__match(sp, FALSE, grp_end, rxpbuf + *rp++, sp);
247 			break;
248 		case ALT_E:
249 			rp = j_succ;
250 			return (TRUE);
251 		case GRP_E:
252 			rp = j_fail;
253 			sp = sp_fail;
254 			return (FALSE);
255 		default:
256 			abort();
257 		}
258 	return (*rp != END ? FALSE : TRUE);
259 }
260 
261 /*
262  * Reverse engineer the regular expression, by picking first of all alternates.
263  */
264 char *
265 rxp_expand()
266 {
267 	return (rxp__expand(TRUE));
268 }
269 
270 static char *
271 rxp__expand(first)
272 	int first;
273 {
274 	static char buf[RXP_LINE_SZ/2];
275 	static Rxp_t *rp;
276 	static char *bp;
277 	Rxp_t *grp_ptr;
278 	char *err;
279 
280 	if (first) {
281 		rp = rxpbuf;
282 		bp = buf;
283 	}
284 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
285 		switch(*rp) {
286 		case LIT:
287 			rp++;
288 			*bp++ = *rp++;
289 			break;
290 		case GRP_S:
291 			rp++;
292 			grp_ptr = rxpbuf + *rp;
293 			rp++;
294 			if ((err = rxp__expand(FALSE)) == NULL)
295 				return (err);
296 			rp = grp_ptr;
297 			break;
298 		case ALT_E:
299 			return (buf);
300 		case ALT_S:
301 			rp++;
302 			/* FALLTHROUGH */
303 		case SOT:
304 		case EOT:
305 		case GRP_E:
306 			rp++;
307 			break;
308 		default:
309 			return (NULL);
310 		}
311 	if (first) {
312 		if (*rp != END)
313 			return (NULL);
314 		*bp = '\0';
315 	}
316 	return (buf);
317 }
318