1*2203Sroot #include <ctype.h>
2*2203Sroot
3*2203Sroot typedef int boolean;
4*2203Sroot #define TRUE 1
5*2203Sroot #define FALSE 0
6*2203Sroot #define NIL 0
7*2203Sroot
8*2203Sroot boolean l_onecase; /* true if upper and lower equivalent */
9*2203Sroot
10*2203Sroot #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
11*2203Sroot
12*2203Sroot /* STRNCMP - like strncmp except that we convert the
13*2203Sroot * first string to lower case before comparing
14*2203Sroot * if l_onecase is set.
15*2203Sroot */
16*2203Sroot
STRNCMP(s1,s2,len)17*2203Sroot STRNCMP(s1, s2, len)
18*2203Sroot register char *s1,*s2;
19*2203Sroot register int len;
20*2203Sroot {
21*2203Sroot if (l_onecase) {
22*2203Sroot do
23*2203Sroot if (*s2 - makelower(*s1))
24*2203Sroot return (*s2 - makelower(*s1));
25*2203Sroot else {
26*2203Sroot s2++;
27*2203Sroot s1++;
28*2203Sroot }
29*2203Sroot while (--len);
30*2203Sroot } else {
31*2203Sroot do
32*2203Sroot if (*s2 - *s1)
33*2203Sroot return (*s2 - *s1);
34*2203Sroot else {
35*2203Sroot s2++;
36*2203Sroot s1++;
37*2203Sroot }
38*2203Sroot while (--len);
39*2203Sroot }
40*2203Sroot return(0);
41*2203Sroot }
42*2203Sroot
43*2203Sroot /* The following routine converts an irregular expression to
44*2203Sroot * internal format.
45*2203Sroot *
46*2203Sroot * Either meta symbols (\a \d or \p) or character strings or
47*2203Sroot * operations ( alternation or perenthesizing ) can be
48*2203Sroot * specified. Each starts with a descriptor byte. The descriptor
49*2203Sroot * byte has STR set for strings, META set for meta symbols
50*2203Sroot * and OPER set for operations.
51*2203Sroot * The descriptor byte can also have the OPT bit set if the object
52*2203Sroot * defined is optional. Also ALT can be set to indicate an alternation.
53*2203Sroot *
54*2203Sroot * For metasymbols the byte following the descriptor byte identities
55*2203Sroot * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
56*2203Sroot * strings the byte after the descriptor is a character count for
57*2203Sroot * the string:
58*2203Sroot *
59*2203Sroot * meta symbols := descriptor
60*2203Sroot * symbol
61*2203Sroot *
62*2203Sroot * strings := descriptor
63*2203Sroot * character count
64*2203Sroot * the string
65*2203Sroot *
66*2203Sroot * operatins := descriptor
67*2203Sroot * symbol
68*2203Sroot * character count
69*2203Sroot */
70*2203Sroot
71*2203Sroot /*
72*2203Sroot * handy macros for accessing parts of match blocks
73*2203Sroot */
74*2203Sroot #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
75*2203Sroot #define MNEXT(A) (A+2) /* character following a metasymbol block */
76*2203Sroot
77*2203Sroot #define OSYM(A) (*(A+1)) /* symbol in an operation block */
78*2203Sroot #define OCNT(A) (*(A+2)) /* character count */
79*2203Sroot #define ONEXT(A) (A+3) /* next character after the operation */
80*2203Sroot #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
81*2203Sroot
82*2203Sroot #define SCNT(A) (*(A+1)) /* byte count of a string */
83*2203Sroot #define SSTR(A) (A+2) /* address of the string */
84*2203Sroot #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
85*2203Sroot
86*2203Sroot /*
87*2203Sroot * bit flags in the descriptor
88*2203Sroot */
89*2203Sroot #define OPT 1
90*2203Sroot #define STR 2
91*2203Sroot #define META 4
92*2203Sroot #define ALT 8
93*2203Sroot #define OPER 16
94*2203Sroot
95*2203Sroot char *ure; /* pointer current position in unconverted exp */
96*2203Sroot char *ccre; /* pointer to current position in converted exp*/
97*2203Sroot char *malloc();
98*2203Sroot
99*2203Sroot char *
convexp(re)100*2203Sroot convexp(re)
101*2203Sroot char *re; /* unconverted irregular expression */
102*2203Sroot {
103*2203Sroot register char *cre; /* pointer to converted regular expression */
104*2203Sroot
105*2203Sroot /* allocate room for the converted expression */
106*2203Sroot if (re == NIL)
107*2203Sroot return (NIL);
108*2203Sroot if (*re == '\0')
109*2203Sroot return (NIL);
110*2203Sroot cre = malloc (4 * strlen(re) + 3);
111*2203Sroot ccre = cre;
112*2203Sroot ure = re;
113*2203Sroot
114*2203Sroot /* start the conversion with a \a */
115*2203Sroot *cre = META | OPT;
116*2203Sroot MSYM(cre) = 'a';
117*2203Sroot ccre = MNEXT(cre);
118*2203Sroot
119*2203Sroot /* start the conversion (its recursive) */
120*2203Sroot expconv ();
121*2203Sroot *ccre = 0;
122*2203Sroot return (cre);
123*2203Sroot }
124*2203Sroot
expconv()125*2203Sroot expconv()
126*2203Sroot {
127*2203Sroot register char *cs; /* pointer to current symbol in converted exp */
128*2203Sroot register char c; /* character being processed */
129*2203Sroot register char *acs; /* pinter to last alternate */
130*2203Sroot register int temp;
131*2203Sroot
132*2203Sroot /* let the conversion begin */
133*2203Sroot acs = NIL;
134*2203Sroot while (*ure != NIL) {
135*2203Sroot switch (c = *ure++) {
136*2203Sroot
137*2203Sroot case '\\':
138*2203Sroot switch (c = *ure++) {
139*2203Sroot
140*2203Sroot /* escaped characters are just characters */
141*2203Sroot default:
142*2203Sroot if ((*cs & STR) == 0) {
143*2203Sroot cs = ccre;
144*2203Sroot *cs = STR;
145*2203Sroot SCNT(cs) = 1;
146*2203Sroot ccre += 2;
147*2203Sroot } else
148*2203Sroot SCNT(cs)++;
149*2203Sroot *ccre++ = c;
150*2203Sroot break;
151*2203Sroot
152*2203Sroot /* normal(?) metacharacters */
153*2203Sroot case 'a':
154*2203Sroot case 'd':
155*2203Sroot case 'e':
156*2203Sroot case 'p':
157*2203Sroot if (acs != NIL && acs != cs) {
158*2203Sroot do {
159*2203Sroot temp = OCNT(acs);
160*2203Sroot OCNT(acs) = ccre - acs;
161*2203Sroot acs -= temp;
162*2203Sroot } while (temp != 0);
163*2203Sroot acs = NIL;
164*2203Sroot }
165*2203Sroot cs = ccre;
166*2203Sroot *cs = META;
167*2203Sroot MSYM(cs) = c;
168*2203Sroot ccre = MNEXT(cs);
169*2203Sroot break;
170*2203Sroot }
171*2203Sroot break;
172*2203Sroot
173*2203Sroot /* just put the symbol in */
174*2203Sroot case '^':
175*2203Sroot case '$':
176*2203Sroot if (acs != NIL && acs != cs) {
177*2203Sroot do {
178*2203Sroot temp = OCNT(acs);
179*2203Sroot OCNT(acs) = ccre - acs;
180*2203Sroot acs -= temp;
181*2203Sroot } while (temp != 0);
182*2203Sroot acs = NIL;
183*2203Sroot }
184*2203Sroot cs = ccre;
185*2203Sroot *cs = META;
186*2203Sroot MSYM(cs) = c;
187*2203Sroot ccre = MNEXT(cs);
188*2203Sroot break;
189*2203Sroot
190*2203Sroot /* mark the last match sequence as optional */
191*2203Sroot case '?':
192*2203Sroot *cs = *cs | OPT;
193*2203Sroot break;
194*2203Sroot
195*2203Sroot /* recurse and define a subexpression */
196*2203Sroot case '(':
197*2203Sroot if (acs != NIL && acs != cs) {
198*2203Sroot do {
199*2203Sroot temp = OCNT(acs);
200*2203Sroot OCNT(acs) = ccre - acs;
201*2203Sroot acs -= temp;
202*2203Sroot } while (temp != 0);
203*2203Sroot acs = NIL;
204*2203Sroot }
205*2203Sroot cs = ccre;
206*2203Sroot *cs = OPER;
207*2203Sroot OSYM(cs) = '(';
208*2203Sroot ccre = ONEXT(cs);
209*2203Sroot expconv ();
210*2203Sroot OCNT(cs) = ccre - cs; /* offset to next symbol */
211*2203Sroot break;
212*2203Sroot
213*2203Sroot /* return from a recursion */
214*2203Sroot case ')':
215*2203Sroot if (acs != NIL) {
216*2203Sroot do {
217*2203Sroot temp = OCNT(acs);
218*2203Sroot OCNT(acs) = ccre - acs;
219*2203Sroot acs -= temp;
220*2203Sroot } while (temp != 0);
221*2203Sroot acs = NIL;
222*2203Sroot }
223*2203Sroot cs = ccre;
224*2203Sroot *cs = META;
225*2203Sroot MSYM(cs) = c;
226*2203Sroot ccre = MNEXT(cs);
227*2203Sroot return;
228*2203Sroot
229*2203Sroot /* mark the last match sequence as having an alternate */
230*2203Sroot /* the third byte will contain an offset to jump over the */
231*2203Sroot /* alternate match in case the first did not fail */
232*2203Sroot case '|':
233*2203Sroot if (acs != NIL && acs != cs)
234*2203Sroot OCNT(ccre) = ccre - acs; /* make a back pointer */
235*2203Sroot else
236*2203Sroot OCNT(ccre) = 0;
237*2203Sroot *cs |= ALT;
238*2203Sroot cs = ccre;
239*2203Sroot *cs = OPER;
240*2203Sroot OSYM(cs) = '|';
241*2203Sroot ccre = ONEXT(cs);
242*2203Sroot acs = cs; /* remember that the pointer is to be filles */
243*2203Sroot break;
244*2203Sroot
245*2203Sroot /* if its not a metasymbol just build a scharacter string */
246*2203Sroot default:
247*2203Sroot if ((*cs & STR) == 0) {
248*2203Sroot cs = ccre;
249*2203Sroot *cs = STR;
250*2203Sroot SCNT(cs) = 1;
251*2203Sroot ccre = SSTR(cs);
252*2203Sroot } else
253*2203Sroot SCNT(cs)++;
254*2203Sroot *ccre++ = c;
255*2203Sroot break;
256*2203Sroot }
257*2203Sroot }
258*2203Sroot if (acs != NIL) {
259*2203Sroot do {
260*2203Sroot temp = OCNT(acs);
261*2203Sroot OCNT(acs) = ccre - acs;
262*2203Sroot acs -= temp;
263*2203Sroot } while (temp != 0);
264*2203Sroot acs = NIL;
265*2203Sroot }
266*2203Sroot return;
267*2203Sroot }
268*2203Sroot /* end of convertre */
269*2203Sroot
270*2203Sroot
271*2203Sroot /*
272*2203Sroot * The following routine recognises an irregular expresion
273*2203Sroot * with the following special characters:
274*2203Sroot *
275*2203Sroot * \? - means last match was optional
276*2203Sroot * \a - matches any number of characters
277*2203Sroot * \d - matches any number of spaces and tabs
278*2203Sroot * \p - matches any number of alphanumeric
279*2203Sroot * characters. The
280*2203Sroot * characters matched will be copied into
281*2203Sroot * the area pointed to by 'name'.
282*2203Sroot * \| - alternation
283*2203Sroot * \( \) - grouping used mostly for alternation and
284*2203Sroot * optionality
285*2203Sroot *
286*2203Sroot * The irregular expression must be translated to internal form
287*2203Sroot * prior to calling this routine
288*2203Sroot *
289*2203Sroot * The value returned is the pointer to the first non \a
290*2203Sroot * character matched.
291*2203Sroot */
292*2203Sroot
293*2203Sroot boolean _escaped; /* true if we are currently _escaped */
294*2203Sroot char *_start; /* start of string */
295*2203Sroot
296*2203Sroot char *
expmatch(s,re,mstring)297*2203Sroot expmatch (s, re, mstring)
298*2203Sroot register char *s; /* string to check for a match in */
299*2203Sroot register char *re; /* a converted irregular expression */
300*2203Sroot register char *mstring; /* where to put whatever matches a \p */
301*2203Sroot {
302*2203Sroot register char *cs; /* the current symbol */
303*2203Sroot register char *ptr,*s1; /* temporary pointer */
304*2203Sroot boolean matched; /* a temporary boolean */
305*2203Sroot
306*2203Sroot /* initial conditions */
307*2203Sroot if (re == NIL)
308*2203Sroot return (NIL);
309*2203Sroot cs = re;
310*2203Sroot matched = FALSE;
311*2203Sroot
312*2203Sroot /* loop till expression string is exhausted (or at least pretty tired) */
313*2203Sroot while (*cs) {
314*2203Sroot switch (*cs & (OPER | STR | META)) {
315*2203Sroot
316*2203Sroot /* try to match a string */
317*2203Sroot case STR:
318*2203Sroot matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
319*2203Sroot if (matched) {
320*2203Sroot
321*2203Sroot /* hoorah it matches */
322*2203Sroot s += SCNT(cs);
323*2203Sroot cs = SNEXT(cs);
324*2203Sroot } else if (*cs & ALT) {
325*2203Sroot
326*2203Sroot /* alternation, skip to next expression */
327*2203Sroot cs = SNEXT(cs);
328*2203Sroot } else if (*cs & OPT) {
329*2203Sroot
330*2203Sroot /* the match is optional */
331*2203Sroot cs = SNEXT(cs);
332*2203Sroot matched = 1; /* indicate a successful match */
333*2203Sroot } else {
334*2203Sroot
335*2203Sroot /* no match, error return */
336*2203Sroot return (NIL);
337*2203Sroot }
338*2203Sroot break;
339*2203Sroot
340*2203Sroot /* an operator, do something fancy */
341*2203Sroot case OPER:
342*2203Sroot switch (OSYM(cs)) {
343*2203Sroot
344*2203Sroot /* this is an alternation */
345*2203Sroot case '|':
346*2203Sroot if (matched)
347*2203Sroot
348*2203Sroot /* last thing in the alternation was a match, skip ahead */
349*2203Sroot cs = OPTR(cs);
350*2203Sroot else
351*2203Sroot
352*2203Sroot /* no match, keep trying */
353*2203Sroot cs = ONEXT(cs);
354*2203Sroot break;
355*2203Sroot
356*2203Sroot /* this is a grouping, recurse */
357*2203Sroot case '(':
358*2203Sroot ptr = expmatch (s, ONEXT(cs), mstring);
359*2203Sroot if (ptr != NIL) {
360*2203Sroot
361*2203Sroot /* the subexpression matched */
362*2203Sroot matched = 1;
363*2203Sroot s = ptr;
364*2203Sroot } else if (*cs & ALT) {
365*2203Sroot
366*2203Sroot /* alternation, skip to next expression */
367*2203Sroot matched = 0;
368*2203Sroot } else if (*cs & OPT) {
369*2203Sroot
370*2203Sroot /* the match is optional */
371*2203Sroot matched = 1; /* indicate a successful match */
372*2203Sroot } else {
373*2203Sroot
374*2203Sroot /* no match, error return */
375*2203Sroot return (NIL);
376*2203Sroot }
377*2203Sroot cs = OPTR(cs);
378*2203Sroot break;
379*2203Sroot }
380*2203Sroot break;
381*2203Sroot
382*2203Sroot /* try to match a metasymbol */
383*2203Sroot case META:
384*2203Sroot switch (MSYM(cs)) {
385*2203Sroot
386*2203Sroot /* try to match anything and remember what was matched */
387*2203Sroot case 'p':
388*2203Sroot /*
389*2203Sroot * This is really the same as trying the match the
390*2203Sroot * remaining parts of the expression to any subset
391*2203Sroot * of the string.
392*2203Sroot */
393*2203Sroot s1 = s;
394*2203Sroot do {
395*2203Sroot ptr = expmatch (s1, MNEXT(cs), mstring);
396*2203Sroot if (ptr != NIL && s1 != s) {
397*2203Sroot
398*2203Sroot /* we have a match, remember the match */
399*2203Sroot strncpy (mstring, s, s1 - s);
400*2203Sroot mstring[s1 - s] = '\0';
401*2203Sroot return (ptr);
402*2203Sroot } else if (ptr != NIL && (*cs & OPT)) {
403*2203Sroot
404*2203Sroot /* it was aoptional so no match is ok */
405*2203Sroot return (ptr);
406*2203Sroot } else if (ptr != NIL) {
407*2203Sroot
408*2203Sroot /* not optional and we still matched */
409*2203Sroot return (NIL);
410*2203Sroot }
411*2203Sroot if (!isalnum(*s1) && *s1 != '_')
412*2203Sroot return (NIL);
413*2203Sroot if (*s1 == '\\')
414*2203Sroot _escaped = _escaped ? FALSE : TRUE;
415*2203Sroot else
416*2203Sroot _escaped = FALSE;
417*2203Sroot } while (*s1++);
418*2203Sroot return (NIL);
419*2203Sroot
420*2203Sroot /* try to match anything */
421*2203Sroot case 'a':
422*2203Sroot /*
423*2203Sroot * This is really the same as trying the match the
424*2203Sroot * remaining parts of the expression to any subset
425*2203Sroot * of the string.
426*2203Sroot */
427*2203Sroot s1 = s;
428*2203Sroot do {
429*2203Sroot ptr = expmatch (s1, MNEXT(cs), mstring);
430*2203Sroot if (ptr != NIL && s1 != s) {
431*2203Sroot
432*2203Sroot /* we have a match */
433*2203Sroot return (ptr);
434*2203Sroot } else if (ptr != NIL && (*cs & OPT)) {
435*2203Sroot
436*2203Sroot /* it was aoptional so no match is ok */
437*2203Sroot return (ptr);
438*2203Sroot } else if (ptr != NIL) {
439*2203Sroot
440*2203Sroot /* not optional and we still matched */
441*2203Sroot return (NIL);
442*2203Sroot }
443*2203Sroot if (*s1 == '\\')
444*2203Sroot _escaped = _escaped ? FALSE : TRUE;
445*2203Sroot else
446*2203Sroot _escaped = FALSE;
447*2203Sroot } while (*s1++);
448*2203Sroot return (NIL);
449*2203Sroot
450*2203Sroot /* fail if we are currently _escaped */
451*2203Sroot case 'e':
452*2203Sroot if (_escaped)
453*2203Sroot return(NIL);
454*2203Sroot cs = MNEXT(cs);
455*2203Sroot break;
456*2203Sroot
457*2203Sroot /* match any number of tabs and spaces */
458*2203Sroot case 'd':
459*2203Sroot ptr = s;
460*2203Sroot while (*s == ' ' || *s == '\t')
461*2203Sroot s++;
462*2203Sroot if (s != ptr || s == _start) {
463*2203Sroot
464*2203Sroot /* match, be happy */
465*2203Sroot matched = 1;
466*2203Sroot cs = MNEXT(cs);
467*2203Sroot } else if (*s == '\n' || *s == '\0') {
468*2203Sroot
469*2203Sroot /* match, be happy */
470*2203Sroot matched = 1;
471*2203Sroot cs = MNEXT(cs);
472*2203Sroot } else if (*cs & ALT) {
473*2203Sroot
474*2203Sroot /* try the next part */
475*2203Sroot matched = 0;
476*2203Sroot cs = MNEXT(cs);
477*2203Sroot } else if (*cs & OPT) {
478*2203Sroot
479*2203Sroot /* doesn't matter */
480*2203Sroot matched = 1;
481*2203Sroot cs = MNEXT(cs);
482*2203Sroot } else
483*2203Sroot
484*2203Sroot /* no match, error return */
485*2203Sroot return (NIL);
486*2203Sroot break;
487*2203Sroot
488*2203Sroot /* check for end of line */
489*2203Sroot case '$':
490*2203Sroot if (*s == '\0' || *s == '\n') {
491*2203Sroot
492*2203Sroot /* match, be happy */
493*2203Sroot s++;
494*2203Sroot matched = 1;
495*2203Sroot cs = MNEXT(cs);
496*2203Sroot } else if (*cs & ALT) {
497*2203Sroot
498*2203Sroot /* try the next part */
499*2203Sroot matched = 0;
500*2203Sroot cs = MNEXT(cs);
501*2203Sroot } else if (*cs & OPT) {
502*2203Sroot
503*2203Sroot /* doesn't matter */
504*2203Sroot matched = 1;
505*2203Sroot cs = MNEXT(cs);
506*2203Sroot } else
507*2203Sroot
508*2203Sroot /* no match, error return */
509*2203Sroot return (NIL);
510*2203Sroot break;
511*2203Sroot
512*2203Sroot /* check for start of line */
513*2203Sroot case '^':
514*2203Sroot if (s == _start) {
515*2203Sroot
516*2203Sroot /* match, be happy */
517*2203Sroot matched = 1;
518*2203Sroot cs = MNEXT(cs);
519*2203Sroot } else if (*cs & ALT) {
520*2203Sroot
521*2203Sroot /* try the next part */
522*2203Sroot matched = 0;
523*2203Sroot cs = MNEXT(cs);
524*2203Sroot } else if (*cs & OPT) {
525*2203Sroot
526*2203Sroot /* doesn't matter */
527*2203Sroot matched = 1;
528*2203Sroot cs = MNEXT(cs);
529*2203Sroot } else
530*2203Sroot
531*2203Sroot /* no match, error return */
532*2203Sroot return (NIL);
533*2203Sroot break;
534*2203Sroot
535*2203Sroot /* end of a subexpression, return success */
536*2203Sroot case ')':
537*2203Sroot return (s);
538*2203Sroot }
539*2203Sroot break;
540*2203Sroot }
541*2203Sroot }
542*2203Sroot return (s);
543*2203Sroot }
544