1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /* Copyright (c) 1988 AT&T */
22 /* All Rights Reserved */
23
24
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #ifndef _REGEXP_H
31 #define _REGEXP_H
32
33 #pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.9 */
34
35 #include <string.h>
36
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40
41 #define CBRA 2
42 #define CCHR 4
43 #define CDOT 8
44 #define CCL 12
45 #define CXCL 16
46 #define CDOL 20
47 #define CCEOF 22
48 #define CKET 24
49 #define CBACK 36
50 #define NCCL 40
51
52 #define STAR 01
53 #define RNGE 03
54
55 #define NBRA 9
56
57 #define PLACE(c) ep[c >> 3] |= bittab[c & 07]
58 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
59 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0)
60
61 static char *braslist[NBRA];
62 static char *braelist[NBRA];
63 int sed, nbra;
64 char *loc1, *loc2, *locs;
65 static int nodelim;
66
67 int circf;
68 static int low;
69 static int size;
70
71 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
72
73 #ifdef __STDC__
74 int advance(const char *lp, const char *ep);
75 static void getrnge(const char *str);
76 #else
77 int advance();
78 static void getrnge();
79 #endif
80
81 char *
82 #ifdef __STDC__
compile(char * instring,char * ep,const char * endbuf,int seof)83 compile(char *instring, char *ep, const char *endbuf, int seof)
84 #else
85 compile(instring, ep, endbuf, seof)
86 register char *ep;
87 char *instring, *endbuf;
88 int seof;
89 #endif
90 {
91 INIT /* Dependent declarations and initializations */
92 register int c;
93 register int eof = seof;
94 char *lastep;
95 int cclcnt;
96 char bracket[NBRA], *bracketp;
97 int closed;
98 int neg;
99 int lc;
100 int i, cflg;
101 int iflag; /* used for non-ascii characters in brackets */
102
103 #ifdef __lint
104 /* make lint happy */
105 c = nodelim;
106 #endif
107
108 lastep = NULL;
109 if ((c = GETC()) == eof || c == '\n') {
110 if (c == '\n') {
111 UNGETC(c);
112 nodelim = 1;
113 }
114 if (*ep == 0 && !sed)
115 ERROR(41);
116 RETURN(ep);
117 }
118 bracketp = bracket;
119 circf = closed = nbra = 0;
120 if (c == '^')
121 circf++;
122 else
123 UNGETC(c);
124 for (;;) {
125 if (ep >= endbuf)
126 ERROR(50);
127 c = GETC();
128 if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
129 lastep = ep;
130 if (c == eof) {
131 *ep++ = CCEOF;
132 if (bracketp != bracket)
133 ERROR(42);
134 RETURN(ep);
135 }
136 switch (c) {
137
138 case '.':
139 *ep++ = CDOT;
140 continue;
141
142 case '\n':
143 if (!sed) {
144 UNGETC(c);
145 *ep++ = CCEOF;
146 nodelim = 1;
147 if (bracketp != bracket)
148 ERROR(42);
149 RETURN(ep);
150 } else ERROR(36);
151 case '*':
152 if (lastep == NULL || *lastep == CBRA ||
153 *lastep == CKET)
154 goto defchar;
155 *lastep |= STAR;
156 continue;
157
158 case '$':
159 if (PEEKC() != eof && PEEKC() != '\n')
160 goto defchar;
161 *ep++ = CDOL;
162 continue;
163
164 case '[':
165 if (&ep[17] >= endbuf)
166 ERROR(50);
167
168 *ep++ = CCL;
169 lc = 0;
170 for (i = 0; i < 16; i++)
171 ep[i] = 0;
172
173 neg = 0;
174 if ((c = GETC()) == '^') {
175 neg = 1;
176 c = GETC();
177 }
178 iflag = 1;
179 do {
180 c &= 0377;
181 if (c == '\0' || c == '\n')
182 ERROR(49);
183 if ((c & 0200) && iflag) {
184 iflag = 0;
185 if (&ep[32] >= endbuf)
186 ERROR(50);
187 ep[-1] = CXCL;
188 for (i = 16; i < 32; i++)
189 ep[i] = 0;
190 }
191 if (c == '-' && lc != 0) {
192 if ((c = GETC()) == ']') {
193 PLACE('-');
194 break;
195 }
196 if ((c & 0200) && iflag) {
197 iflag = 0;
198 if (&ep[32] >= endbuf)
199 ERROR(50);
200 ep[-1] = CXCL;
201 for (i = 16; i < 32; i++)
202 ep[i] = 0;
203 }
204 while (lc < c) {
205 PLACE(lc);
206 lc++;
207 }
208 }
209 lc = c;
210 PLACE(c);
211 } while ((c = GETC()) != ']');
212
213 if (iflag)
214 iflag = 16;
215 else
216 iflag = 32;
217
218 if (neg) {
219 if (iflag == 32) {
220 for (cclcnt = 0; cclcnt < iflag;
221 cclcnt++)
222 ep[cclcnt] ^= 0377;
223 ep[0] &= 0376;
224 } else {
225 ep[-1] = NCCL;
226 /* make nulls match so test fails */
227 ep[0] |= 01;
228 }
229 }
230
231 ep += iflag;
232
233 continue;
234
235 case '\\':
236 switch (c = GETC()) {
237
238 case '(':
239 if (nbra >= NBRA)
240 ERROR(43);
241 *bracketp++ = (char)nbra;
242 *ep++ = CBRA;
243 *ep++ = (char)nbra++;
244 continue;
245
246 case ')':
247 if (bracketp <= bracket)
248 ERROR(42);
249 *ep++ = CKET;
250 *ep++ = *--bracketp;
251 closed++;
252 continue;
253
254 case '{':
255 if (lastep == NULL)
256 goto defchar;
257 *lastep |= RNGE;
258 cflg = 0;
259 nlim:
260 c = GETC();
261 i = 0;
262 do {
263 if ('0' <= c && c <= '9')
264 i = 10 * i + c - '0';
265 else
266 ERROR(16);
267 } while (((c = GETC()) != '\\') && (c != ','));
268 if (i >= 255)
269 ERROR(11);
270 *ep++ = (char)i;
271 if (c == ',') {
272 if (cflg++)
273 ERROR(44);
274 if ((c = GETC()) == '\\')
275 *ep++ = (char)255;
276 else {
277 UNGETC(c);
278 goto nlim;
279 /* get 2'nd number */
280 }
281 }
282 if (GETC() != '}')
283 ERROR(45);
284 if (!cflg) /* one number */
285 *ep++ = (char)i;
286 else if ((ep[-1] & 0377) < (ep[-2] & 0377))
287 ERROR(46);
288 continue;
289
290 case '\n':
291 ERROR(36);
292
293 case 'n':
294 c = '\n';
295 goto defchar;
296
297 default:
298 if (c >= '1' && c <= '9') {
299 if ((c -= '1') >= closed)
300 ERROR(25);
301 *ep++ = CBACK;
302 *ep++ = (char)c;
303 continue;
304 }
305 }
306 /* Drop through to default to use \ to turn off special chars */
307
308 defchar:
309 default:
310 lastep = ep;
311 *ep++ = CCHR;
312 *ep++ = (char)c;
313 }
314 }
315 /*NOTREACHED*/
316 }
317
318 #ifdef __STDC__
319 int
step(const char * p1,const char * p2)320 step(const char *p1, const char *p2)
321 #else
322 int
323 step(p1, p2)
324 register char *p1, *p2;
325 #endif
326 {
327 char c;
328
329
330 if (circf) {
331 loc1 = (char *)p1;
332 return (advance(p1, p2));
333 }
334 /* fast check for first character */
335 if (*p2 == CCHR) {
336 c = p2[1];
337 do {
338 if (*p1 != c)
339 continue;
340 if (advance(p1, p2)) {
341 loc1 = (char *)p1;
342 return (1);
343 }
344 } while (*p1++);
345 return (0);
346 }
347 /* regular algorithm */
348 do {
349 if (advance(p1, p2)) {
350 loc1 = (char *)p1;
351 return (1);
352 }
353 } while (*p1++);
354 return (0);
355 }
356
357 int
358 #ifdef __STDC__
advance(const char * lp,const char * ep)359 advance(const char *lp, const char *ep)
360 #else
361 advance(lp, ep)
362 register char *lp, *ep;
363 #endif
364 {
365 #ifdef __STDC__
366 const char *curlp;
367 #else
368 register char *curlp;
369 #endif
370 int c;
371 char *bbeg;
372 register char neg;
373 size_t ct;
374
375 for (;;) {
376 neg = 0;
377 switch (*ep++) {
378
379 case CCHR:
380 if (*ep++ == *lp++)
381 continue;
382 return (0);
383 /*FALLTHRU*/
384
385 case CDOT:
386 if (*lp++)
387 continue;
388 return (0);
389 /*FALLTHRU*/
390
391 case CDOL:
392 if (*lp == 0)
393 continue;
394 return (0);
395 /*FALLTHRU*/
396
397 case CCEOF:
398 loc2 = (char *)lp;
399 return (1);
400 /*FALLTHRU*/
401
402 case CXCL:
403 c = (unsigned char)*lp++;
404 if (ISTHERE(c)) {
405 ep += 32;
406 continue;
407 }
408 return (0);
409 /*FALLTHRU*/
410
411 case NCCL:
412 neg = 1;
413 /*FALLTHRU*/
414
415 case CCL:
416 c = *lp++;
417 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
418 ep += 16;
419 continue;
420 }
421 return (0);
422 /*FALLTHRU*/
423
424 case CBRA:
425 braslist[*ep++] = (char *)lp;
426 continue;
427 /*FALLTHRU*/
428
429 case CKET:
430 braelist[*ep++] = (char *)lp;
431 continue;
432 /*FALLTHRU*/
433
434 case CCHR | RNGE:
435 c = *ep++;
436 getrnge(ep);
437 while (low--)
438 if (*lp++ != c)
439 return (0);
440 curlp = lp;
441 while (size--)
442 if (*lp++ != c)
443 break;
444 if (size < 0)
445 lp++;
446 ep += 2;
447 goto star;
448 /*FALLTHRU*/
449
450 case CDOT | RNGE:
451 getrnge(ep);
452 while (low--)
453 if (*lp++ == '\0')
454 return (0);
455 curlp = lp;
456 while (size--)
457 if (*lp++ == '\0')
458 break;
459 if (size < 0)
460 lp++;
461 ep += 2;
462 goto star;
463 /*FALLTHRU*/
464
465 case CXCL | RNGE:
466 getrnge(ep + 32);
467 while (low--) {
468 c = (unsigned char)*lp++;
469 if (!ISTHERE(c))
470 return (0);
471 }
472 curlp = lp;
473 while (size--) {
474 c = (unsigned char)*lp++;
475 if (!ISTHERE(c))
476 break;
477 }
478 if (size < 0)
479 lp++;
480 ep += 34; /* 32 + 2 */
481 goto star;
482 /*FALLTHRU*/
483
484 case NCCL | RNGE:
485 neg = 1;
486 /*FALLTHRU*/
487
488 case CCL | RNGE:
489 getrnge(ep + 16);
490 while (low--) {
491 c = *lp++;
492 if (((c & 0200) || !ISTHERE(c)) ^ neg)
493 return (0);
494 }
495 curlp = lp;
496 while (size--) {
497 c = *lp++;
498 if (((c & 0200) || !ISTHERE(c)) ^ neg)
499 break;
500 }
501 if (size < 0)
502 lp++;
503 ep += 18; /* 16 + 2 */
504 goto star;
505 /*FALLTHRU*/
506
507 case CBACK:
508 bbeg = braslist[*ep];
509 ct = braelist[*ep++] - bbeg;
510
511 if (ecmp(bbeg, lp, ct)) {
512 lp += ct;
513 continue;
514 }
515 return (0);
516 /*FALLTHRU*/
517
518 case CBACK | STAR:
519 bbeg = braslist[*ep];
520 ct = braelist[*ep++] - bbeg;
521 curlp = lp;
522 while (ecmp(bbeg, lp, ct))
523 lp += ct;
524
525 while (lp >= curlp) {
526 if (advance(lp, ep))
527 return (1);
528 lp -= ct;
529 }
530 return (0);
531 /*FALLTHRU*/
532
533 case CDOT | STAR:
534 curlp = lp;
535 while (*lp++);
536 goto star;
537 /*FALLTHRU*/
538
539 case CCHR | STAR:
540 curlp = lp;
541 while (*lp++ == *ep);
542 ep++;
543 goto star;
544 /*FALLTHRU*/
545
546 case CXCL | STAR:
547 curlp = lp;
548 do {
549 c = (unsigned char)*lp++;
550 } while (ISTHERE(c));
551 ep += 32;
552 goto star;
553 /*FALLTHRU*/
554
555 case NCCL | STAR:
556 neg = 1;
557 /*FALLTHRU*/
558
559 case CCL | STAR:
560 curlp = lp;
561 do {
562 c = *lp++;
563 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
564 ep += 16;
565 goto star;
566 /*FALLTHRU*/
567
568 star:
569 do {
570 if (--lp == locs)
571 break;
572 if (advance(lp, ep))
573 return (1);
574 } while (lp > curlp);
575 return (0);
576
577 }
578 }
579 /*NOTREACHED*/
580 }
581
582 static void
583 #ifdef __STDC__
getrnge(const char * str)584 getrnge(const char *str)
585 #else
586 getrnge(str)
587 register char *str;
588 #endif
589 {
590 low = *str++ & 0377;
591 size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
592 }
593
594 #ifdef __cplusplus
595 }
596 #endif
597
598 #endif /* _REGEXP_H */
599