1 %{
2 #include "common.h"
3 #include "smtp.h"
4 #include <ctype.h>
5
6 #define YYMAXDEPTH 500 /* was default 150 */
7
8 char *yylp; /* next character to be lex'd */
9 int yydone; /* tell yylex to give up */
10 char *yybuffer; /* first parsed character */
11 char *yyend; /* end of buffer to be parsed */
12 Node *root;
13 Field *firstfield;
14 Field *lastfield;
15 Node *usender;
16 Node *usys;
17 Node *udate;
18 char *startfield, *endfield;
19 int originator;
20 int destination;
21 int date;
22 int received;
23 int messageid;
24 %}
25
26 %term WORD
27 %term DATE
28 %term RESENT_DATE
29 %term RETURN_PATH
30 %term FROM
31 %term SENDER
32 %term REPLY_TO
33 %term RESENT_FROM
34 %term RESENT_SENDER
35 %term RESENT_REPLY_TO
36 %term SUBJECT
37 %term TO
38 %term CC
39 %term BCC
40 %term RESENT_TO
41 %term RESENT_CC
42 %term RESENT_BCC
43 %term REMOTE
44 %term PRECEDENCE
45 %term MIMEVERSION
46 %term CONTENTTYPE
47 %term MESSAGEID
48 %term RECEIVED
49 %term MAILER
50 %term BADTOKEN
51 %start msg
52 %%
53
54 msg : fields
55 | unixfrom '\n' fields
56 ;
57 fields : '\n'
58 { yydone = 1; }
59 | field '\n'
60 | field '\n' fields
61 ;
62 field : dates
63 { date = 1; }
64 | originator
65 { originator = 1; }
66 | destination
67 { destination = 1; }
68 | subject
69 | optional
70 | ignored
71 | received
72 | precedence
73 | error '\n' field
74 ;
75 unixfrom : FROM route_addr unix_date_time REMOTE FROM word
76 { freenode($1); freenode($4); freenode($5);
77 usender = $2; udate = $3; usys = $6;
78 }
79 ;
80 originator : REPLY_TO ':' address_list
81 { newfield(link3($1, $2, $3), 1); }
82 | RETURN_PATH ':' route_addr
83 { newfield(link3($1, $2, $3), 1); }
84 | FROM ':' mailbox_list
85 { newfield(link3($1, $2, $3), 1); }
86 | SENDER ':' mailbox
87 { newfield(link3($1, $2, $3), 1); }
88 | RESENT_REPLY_TO ':' address_list
89 { newfield(link3($1, $2, $3), 1); }
90 | RESENT_SENDER ':' mailbox
91 { newfield(link3($1, $2, $3), 1); }
92 | RESENT_FROM ':' mailbox
93 { newfield(link3($1, $2, $3), 1); }
94 ;
95 dates : DATE ':' date_time
96 { newfield(link3($1, $2, $3), 0); }
97 | RESENT_DATE ':' date_time
98 { newfield(link3($1, $2, $3), 0); }
99 ;
100 destination : TO ':'
101 { newfield(link2($1, $2), 0); }
102 | TO ':' address_list
103 { newfield(link3($1, $2, $3), 0); }
104 | RESENT_TO ':'
105 { newfield(link2($1, $2), 0); }
106 | RESENT_TO ':' address_list
107 { newfield(link3($1, $2, $3), 0); }
108 | CC ':'
109 { newfield(link2($1, $2), 0); }
110 | CC ':' address_list
111 { newfield(link3($1, $2, $3), 0); }
112 | RESENT_CC ':'
113 { newfield(link2($1, $2), 0); }
114 | RESENT_CC ':' address_list
115 { newfield(link3($1, $2, $3), 0); }
116 | BCC ':'
117 { newfield(link2($1, $2), 0); }
118 | BCC ':' address_list
119 { newfield(link3($1, $2, $3), 0); }
120 | RESENT_BCC ':'
121 { newfield(link2($1, $2), 0); }
122 | RESENT_BCC ':' address_list
123 { newfield(link3($1, $2, $3), 0); }
124 ;
125 subject : SUBJECT ':' things
126 { newfield(link3($1, $2, $3), 0); }
127 | SUBJECT ':'
128 { newfield(link2($1, $2), 0); }
129 ;
130 received : RECEIVED ':' things
131 { newfield(link3($1, $2, $3), 0); received++; }
132 | RECEIVED ':'
133 { newfield(link2($1, $2), 0); received++; }
134 ;
135 precedence : PRECEDENCE ':' things
136 { newfield(link3($1, $2, $3), 0); }
137 | PRECEDENCE ':'
138 { newfield(link2($1, $2), 0); }
139 ;
140 ignored : ignoredhdr ':' things
141 { newfield(link3($1, $2, $3), 0); }
142 | ignoredhdr ':'
143 { newfield(link2($1, $2), 0); }
144 ;
145 ignoredhdr : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
146 ;
147 optional : fieldwords ':' things
148 { /* hack to allow same lex for field names and the rest */
149 if(badfieldname($1)){
150 freenode($1);
151 freenode($2);
152 freenode($3);
153 return 1;
154 }
155 newfield(link3($1, $2, $3), 0);
156 }
157 | fieldwords ':'
158 { /* hack to allow same lex for field names and the rest */
159 if(badfieldname($1)){
160 freenode($1);
161 freenode($2);
162 return 1;
163 }
164 newfield(link2($1, $2), 0);
165 }
166 ;
167 address_list : address
168 | address_list ',' address
169 { $$ = link3($1, $2, $3); }
170 ;
171 address : mailbox
172 | group
173 ;
174 group : phrase ':' address_list ';'
175 { $$ = link2($1, link3($2, $3, $4)); }
176 | phrase ':' ';'
177 { $$ = link3($1, $2, $3); }
178 ;
179 mailbox_list : mailbox
180 | mailbox_list ',' mailbox
181 { $$ = link3($1, $2, $3); }
182 ;
183 mailbox : route_addr
184 | phrase brak_addr
185 { $$ = link2($1, $2); }
186 | brak_addr
187 ;
188 brak_addr : '<' route_addr '>'
189 { $$ = link3($1, $2, $3); }
190 | '<' '>'
191 { $$ = nobody($2); freenode($1); }
192 ;
193 route_addr : route ':' at_addr
194 { $$ = address(concat($1, concat($2, $3))); }
195 | addr_spec
196 ;
197 route : '@' domain
198 { $$ = concat($1, $2); }
199 | route ',' '@' domain
200 { $$ = concat($1, concat($2, concat($3, $4))); }
201 ;
202 addr_spec : local_part
203 { $$ = address($1); }
204 | at_addr
205 ;
206 at_addr : local_part '@' domain
207 { $$ = address(concat($1, concat($2, $3)));}
208 | at_addr '@' domain
209 { $$ = address(concat($1, concat($2, $3)));}
210 ;
211 local_part : word
212 ;
213 domain : word
214 ;
215 phrase : word
216 | phrase word
217 { $$ = link2($1, $2); }
218 ;
219 things : thing
220 | things thing
221 { $$ = link2($1, $2); }
222 ;
223 thing : word | '<' | '>' | '@' | ':' | ';' | ','
224 ;
225 date_time : things
226 ;
227 unix_date_time : word word word unix_time word word
228 { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
229 ;
230 unix_time : word
231 | unix_time ':' word
232 { $$ = link3($1, $2, $3); }
233 ;
234 word : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
235 | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
236 | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
237 | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
238 ;
239 fieldwords : fieldword
240 | WORD
241 | fieldwords fieldword
242 { $$ = link2($1, $2); }
243 | fieldwords word
244 { $$ = link2($1, $2); }
245 ;
246 fieldword : '<' | '>' | '@' | ';' | ','
247 ;
248 %%
249
250 /*
251 * Initialize the parsing. Done once for each header field.
252 */
253 void
254 yyinit(char *p, int len)
255 {
256 yybuffer = p;
257 yylp = p;
258 yyend = p + len;
259 firstfield = lastfield = 0;
260 received = 0;
261 }
262
263 /*
264 * keywords identifying header fields we care about
265 */
266 typedef struct Keyword Keyword;
267 struct Keyword {
268 char *rep;
269 int val;
270 };
271
272 /* field names that we need to recognize */
273 Keyword key[] = {
274 { "date", DATE },
275 { "resent-date", RESENT_DATE },
276 { "return_path", RETURN_PATH },
277 { "from", FROM },
278 { "sender", SENDER },
279 { "reply-to", REPLY_TO },
280 { "resent-from", RESENT_FROM },
281 { "resent-sender", RESENT_SENDER },
282 { "resent-reply-to", RESENT_REPLY_TO },
283 { "to", TO },
284 { "cc", CC },
285 { "bcc", BCC },
286 { "resent-to", RESENT_TO },
287 { "resent-cc", RESENT_CC },
288 { "resent-bcc", RESENT_BCC },
289 { "remote", REMOTE },
290 { "subject", SUBJECT },
291 { "precedence", PRECEDENCE },
292 { "mime-version", MIMEVERSION },
293 { "content-type", CONTENTTYPE },
294 { "message-id", MESSAGEID },
295 { "received", RECEIVED },
296 { "mailer", MAILER },
297 { "who-the-hell-cares", WORD }
298 };
299
300 /*
301 * Lexical analysis for an rfc822 header field. Continuation lines
302 * are handled in yywhite() when skipping over white space.
303 *
304 */
yylex(void)305 yylex(void)
306 {
307 String *t;
308 int quoting;
309 int escaping;
310 char *start;
311 Keyword *kp;
312 int c, d;
313
314 /* print("lexing\n"); /**/
315 if(yylp >= yyend)
316 return 0;
317 if(yydone)
318 return 0;
319
320 quoting = escaping = 0;
321 start = yylp;
322 yylval = malloc(sizeof(Node));
323 yylval->white = yylval->s = 0;
324 yylval->next = 0;
325 yylval->addr = 0;
326 yylval->start = yylp;
327 for(t = 0; yylp < yyend; yylp++){
328 c = *yylp & 0xff;
329
330 /* dump nulls, they can't be in header */
331 if(c == 0)
332 continue;
333
334 if(escaping) {
335 escaping = 0;
336 } else if(quoting) {
337 switch(c){
338 case '\\':
339 escaping = 1;
340 break;
341 case '\n':
342 d = (*(yylp+1))&0xff;
343 if(d != ' ' && d != '\t'){
344 quoting = 0;
345 yylp--;
346 continue;
347 }
348 break;
349 case '"':
350 quoting = 0;
351 break;
352 }
353 } else {
354 switch(c){
355 case '\\':
356 escaping = 1;
357 break;
358 case '(':
359 case ' ':
360 case '\t':
361 case '\r':
362 goto out;
363 case '\n':
364 if(yylp == start){
365 yylp++;
366 /* print("lex(c %c)\n", c); /**/
367 yylval->end = yylp;
368 return yylval->c = c;
369 }
370 goto out;
371 case '@':
372 case '>':
373 case '<':
374 case ':':
375 case ',':
376 case ';':
377 if(yylp == start){
378 yylp++;
379 yylval->white = yywhite();
380 /* print("lex(c %c)\n", c); /**/
381 yylval->end = yylp;
382 return yylval->c = c;
383 }
384 goto out;
385 case '"':
386 quoting = 1;
387 break;
388 default:
389 break;
390 }
391 }
392 if(t == 0)
393 t = s_new();
394 s_putc(t, c);
395 }
396 out:
397 yylval->white = yywhite();
398 if(t) {
399 s_terminate(t);
400 } else /* message begins with white-space! */
401 return yylval->c = '\n';
402 yylval->s = t;
403 for(kp = key; kp->val != WORD; kp++)
404 if(cistrcmp(s_to_c(t), kp->rep)==0)
405 break;
406 /* print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
407 yylval->end = yylp;
408 return yylval->c = kp->val;
409 }
410
411 void
yyerror(char * x)412 yyerror(char *x)
413 {
414 USED(x);
415
416 /*fprint(2, "parse err: %s\n", x);/**/
417 }
418
419 /*
420 * parse white space and comments
421 */
422 String *
yywhite(void)423 yywhite(void)
424 {
425 String *w;
426 int clevel;
427 int c;
428 int escaping;
429
430 escaping = clevel = 0;
431 for(w = 0; yylp < yyend; yylp++){
432 c = *yylp & 0xff;
433
434 /* dump nulls, they can't be in header */
435 if(c == 0)
436 continue;
437
438 if(escaping){
439 escaping = 0;
440 } else if(clevel) {
441 switch(c){
442 case '\n':
443 /*
444 * look for multiline fields
445 */
446 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
447 break;
448 else
449 goto out;
450 case '\\':
451 escaping = 1;
452 break;
453 case '(':
454 clevel++;
455 break;
456 case ')':
457 clevel--;
458 break;
459 }
460 } else {
461 switch(c){
462 case '\\':
463 escaping = 1;
464 break;
465 case '(':
466 clevel++;
467 break;
468 case ' ':
469 case '\t':
470 case '\r':
471 break;
472 case '\n':
473 /*
474 * look for multiline fields
475 */
476 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
477 break;
478 else
479 goto out;
480 default:
481 goto out;
482 }
483 }
484 if(w == 0)
485 w = s_new();
486 s_putc(w, c);
487 }
488 out:
489 if(w)
490 s_terminate(w);
491 return w;
492 }
493
494 /*
495 * link two parsed entries together
496 */
497 Node*
link2(Node * p1,Node * p2)498 link2(Node *p1, Node *p2)
499 {
500 Node *p;
501
502 for(p = p1; p->next; p = p->next)
503 ;
504 p->next = p2;
505 return p1;
506 }
507
508 /*
509 * link three parsed entries together
510 */
511 Node*
link3(Node * p1,Node * p2,Node * p3)512 link3(Node *p1, Node *p2, Node *p3)
513 {
514 Node *p;
515
516 for(p = p2; p->next; p = p->next)
517 ;
518 p->next = p3;
519
520 for(p = p1; p->next; p = p->next)
521 ;
522 p->next = p2;
523
524 return p1;
525 }
526
527 /*
528 * make a:b, move all white space after both
529 */
530 Node*
colon(Node * p1,Node * p2)531 colon(Node *p1, Node *p2)
532 {
533 if(p1->white){
534 if(p2->white)
535 s_append(p1->white, s_to_c(p2->white));
536 } else {
537 p1->white = p2->white;
538 p2->white = 0;
539 }
540
541 s_append(p1->s, ":");
542 if(p2->s)
543 s_append(p1->s, s_to_c(p2->s));
544
545 if(p1->end < p2->end)
546 p1->end = p2->end;
547 freenode(p2);
548 return p1;
549 }
550
551 /*
552 * concatenate two fields, move all white space after both
553 */
554 Node*
concat(Node * p1,Node * p2)555 concat(Node *p1, Node *p2)
556 {
557 char buf[2];
558
559 if(p1->white){
560 if(p2->white)
561 s_append(p1->white, s_to_c(p2->white));
562 } else {
563 p1->white = p2->white;
564 p2->white = 0;
565 }
566
567 if(p1->s == nil){
568 buf[0] = p1->c;
569 buf[1] = 0;
570 p1->s = s_new();
571 s_append(p1->s, buf);
572 }
573
574 if(p2->s)
575 s_append(p1->s, s_to_c(p2->s));
576 else {
577 buf[0] = p2->c;
578 buf[1] = 0;
579 s_append(p1->s, buf);
580 }
581
582 if(p1->end < p2->end)
583 p1->end = p2->end;
584 freenode(p2);
585 return p1;
586 }
587
588 /*
589 * look for disallowed chars in the field name
590 */
591 int
badfieldname(Node * p)592 badfieldname(Node *p)
593 {
594 for(; p; p = p->next){
595 /* field name can't contain white space */
596 if(p->white && p->next)
597 return 1;
598 }
599 return 0;
600 }
601
602 /*
603 * mark as an address
604 */
605 Node *
address(Node * p)606 address(Node *p)
607 {
608 p->addr = 1;
609 return p;
610 }
611
612 /*
613 * case independent string compare
614 */
615 int
cistrcmp(char * s1,char * s2)616 cistrcmp(char *s1, char *s2)
617 {
618 int c1, c2;
619
620 for(; *s1; s1++, s2++){
621 c1 = isupper(*s1) ? tolower(*s1) : *s1;
622 c2 = isupper(*s2) ? tolower(*s2) : *s2;
623 if (c1 != c2)
624 return -1;
625 }
626 return *s2;
627 }
628
629 /*
630 * free a node
631 */
632 void
freenode(Node * p)633 freenode(Node *p)
634 {
635 Node *tp;
636
637 while(p){
638 tp = p->next;
639 if(p->s)
640 s_free(p->s);
641 if(p->white)
642 s_free(p->white);
643 free(p);
644 p = tp;
645 }
646 }
647
648
649 /*
650 * an anonymous user
651 */
652 Node*
nobody(Node * p)653 nobody(Node *p)
654 {
655 if(p->s)
656 s_free(p->s);
657 p->s = s_copy("pOsTmAsTeR");
658 p->addr = 1;
659 return p;
660 }
661
662 /*
663 * add anything that was dropped because of a parse error
664 */
665 void
missing(Node * p)666 missing(Node *p)
667 {
668 Node *np;
669 char *start, *end;
670 Field *f;
671 String *s;
672
673 start = yybuffer;
674 if(lastfield != nil){
675 for(np = lastfield->node; np; np = np->next)
676 start = np->end+1;
677 }
678
679 end = p->start-1;
680
681 if(end <= start)
682 return;
683
684 if(strncmp(start, "From ", 5) == 0)
685 return;
686
687 np = malloc(sizeof(Node));
688 np->start = start;
689 np->end = end;
690 np->white = nil;
691 s = s_copy("BadHeader: ");
692 np->s = s_nappend(s, start, end-start);
693 np->next = nil;
694
695 f = malloc(sizeof(Field));
696 f->next = 0;
697 f->node = np;
698 f->source = 0;
699 if(firstfield)
700 lastfield->next = f;
701 else
702 firstfield = f;
703 lastfield = f;
704 }
705
706 /*
707 * create a new field
708 */
709 void
newfield(Node * p,int source)710 newfield(Node *p, int source)
711 {
712 Field *f;
713
714 missing(p);
715
716 f = malloc(sizeof(Field));
717 f->next = 0;
718 f->node = p;
719 f->source = source;
720 if(firstfield)
721 lastfield->next = f;
722 else
723 firstfield = f;
724 lastfield = f;
725 endfield = startfield;
726 startfield = yylp;
727 }
728
729 /*
730 * fee a list of fields
731 */
732 void
freefield(Field * f)733 freefield(Field *f)
734 {
735 Field *tf;
736
737 while(f){
738 tf = f->next;
739 freenode(f->node);
740 free(f);
741 f = tf;
742 }
743 }
744
745 /*
746 * add some white space to a node
747 */
748 Node*
whiten(Node * p)749 whiten(Node *p)
750 {
751 Node *tp;
752
753 for(tp = p; tp->next; tp = tp->next)
754 ;
755 if(tp->white == 0)
756 tp->white = s_copy(" ");
757 return p;
758 }
759
760 void
yycleanup(void)761 yycleanup(void)
762 {
763 Field *f, *fnext;
764 Node *np, *next;
765
766 for(f = firstfield; f; f = fnext){
767 for(np = f->node; np; np = next){
768 if(np->s)
769 s_free(np->s);
770 if(np->white)
771 s_free(np->white);
772 next = np->next;
773 free(np);
774 }
775 fnext = f->next;
776 free(f);
777 }
778 firstfield = lastfield = 0;
779 }
780