1 %{
2 #include "grep.h"
3 %}
4
5 %union
6 {
7 int val;
8 char* str;
9 Re2 re;
10 }
11
12 %type <re> expr prog
13 %type <re> expr0 expr1 expr2 expr3 expr4
14 %token <str> LCLASS
15 %token <val> LCHAR
16 %token LLPAREN LRPAREN LALT LSTAR LPLUS LQUES
17 %token LBEGIN LEND LDOT LBAD LNEWLINE
18 %%
19
20 prog: /* empty */
21 {
22 yyerror("empty pattern");
23 }
24 | expr newlines
25 {
26 $$.beg = ral(Tend);
27 $$.end = $$.beg;
28 $$ = re2cat(re2star(re2or(re2char(0x00, '\n'-1), re2char('\n'+1, 0xff))), $$);
29 $$ = re2cat($1, $$);
30 $$ = re2cat(re2star(re2char(0x00, 0xff)), $$);
31 topre = $$;
32 }
33
34 expr:
35 expr0
36 | expr newlines expr0
37 {
38 $$ = re2or($1, $3);
39 }
40
41 expr0:
42 expr1
43 | LSTAR { literal = 1; } expr1
44 {
45 $$ = $3;
46 }
47
48 expr1:
49 expr2
50 | expr1 LALT expr2
51 {
52 $$ = re2or($1, $3);
53 }
54
55 expr2:
56 expr3
57 | expr2 expr3
58 {
59 $$ = re2cat($1, $2);
60 }
61
62 expr3:
63 expr4
64 | expr3 LSTAR
65 {
66 $$ = re2star($1);
67 }
68 | expr3 LPLUS
69 {
70 $$.beg = ral(Talt);
71 patchnext($1.end, $$.beg);
72 $$.beg->alt = $1.beg;
73 $$.end = $$.beg;
74 $$.beg = $1.beg;
75 }
76 | expr3 LQUES
77 {
78 $$.beg = ral(Talt);
79 $$.beg->alt = $1.beg;
80 $$.end = $1.end;
81 appendnext($$.end, $$.beg);
82 }
83
84 expr4:
85 LCHAR
86 {
87 $$.beg = ral(Tclass);
88 $$.beg->lo = $1;
89 $$.beg->hi = $1;
90 $$.end = $$.beg;
91 }
92 | LBEGIN
93 {
94 $$.beg = ral(Tbegin);
95 $$.end = $$.beg;
96 }
97 | LEND
98 {
99 $$.beg = ral(Tend);
100 $$.end = $$.beg;
101 }
102 | LDOT
103 {
104 $$ = re2class("^\n");
105 }
106 | LCLASS
107 {
108 $$ = re2class($1);
109 }
110 | LLPAREN expr1 LRPAREN
111 {
112 $$ = $2;
113 }
114
115 newlines:
116 LNEWLINE
117 | newlines LNEWLINE
118 %%
119
120 void
121 yyerror(char *e, ...)
122 {
123 va_list args;
124
125 fprint(2, "grep: ");
126 if(filename)
127 fprint(2, "%s:%ld: ", filename, lineno);
128 else if (pattern)
129 fprint(2, "%s: ", pattern);
130 va_start(args, e);
131 vfprint(2, e, args);
132 va_end(args);
133 fprint(2, "\n");
134 exits("syntax");
135 }
136
137 long
yylex(void)138 yylex(void)
139 {
140 char *q, *eq;
141 int c, s;
142
143 if(peekc) {
144 s = peekc;
145 peekc = 0;
146 return s;
147 }
148 c = getrec();
149 if(literal) {
150 if(c != 0 && c != '\n') {
151 yylval.val = c;
152 return LCHAR;
153 }
154 literal = 0;
155 }
156 switch(c) {
157 default:
158 yylval.val = c;
159 s = LCHAR;
160 break;
161 case '\\':
162 c = getrec();
163 yylval.val = c;
164 s = LCHAR;
165 if(c == '\n')
166 s = LNEWLINE;
167 break;
168 case '[':
169 goto getclass;
170 case '(':
171 s = LLPAREN;
172 break;
173 case ')':
174 s = LRPAREN;
175 break;
176 case '|':
177 s = LALT;
178 break;
179 case '*':
180 s = LSTAR;
181 break;
182 case '+':
183 s = LPLUS;
184 break;
185 case '?':
186 s = LQUES;
187 break;
188 case '^':
189 s = LBEGIN;
190 break;
191 case '$':
192 s = LEND;
193 break;
194 case '.':
195 s = LDOT;
196 break;
197 case 0:
198 peekc = -1;
199 case '\n':
200 s = LNEWLINE;
201 break;
202 }
203 return s;
204
205 getclass:
206 q = u.string;
207 eq = q + nelem(u.string) - 5;
208 c = getrec();
209 if(c == '^') {
210 q[0] = '^';
211 q[1] = '\n';
212 q[2] = '-';
213 q[3] = '\n';
214 q += 4;
215 c = getrec();
216 }
217 for(;;) {
218 if(q >= eq)
219 error("class too long");
220 if(c == ']' || c == 0)
221 break;
222 if(c == '\\') {
223 *q++ = c;
224 c = getrec();
225 if(c == 0)
226 break;
227 }
228 *q++ = c;
229 c = getrec();
230 }
231 *q = 0;
232 if(c == 0)
233 return LBAD;
234 yylval.str = u.string;
235 return LCLASS;
236 }
237