xref: /plan9/sys/src/cmd/upas/vf/vf.c (revision 238ca1fe340c2ddde96f1c246538fcd9a2198a9f)
1 /*
2  *  this is a filter that changes mime types and names of
3  *  suspect executable attachments.
4  */
5 #include "common.h"
6 #include <ctype.h>
7 
8 Biobuf in;
9 Biobuf out;
10 
11 typedef struct Mtype Mtype;
12 typedef struct Hdef Hdef;
13 typedef struct Hline Hline;
14 typedef struct Part Part;
15 
16 static int	badfile(char *name);
17 static int	badtype(char *type);
18 static void	ctype(Part*, Hdef*, char*);
19 static void	cencoding(Part*, Hdef*, char*);
20 static void	cdisposition(Part*, Hdef*, char*);
21 static int	decquoted(char *out, char *in, char *e);
22 static char*	getstring(char *p, String *s, int dolower);
23 static void	init_hdefs(void);
24 static int	isattribute(char **pp, char *attr);
25 static int	latin1toutf(char *out, char *in, char *e);
26 static String*	mkboundary(void);
27 static Part*	part(Part *pp);
28 static Part*	passbody(Part *p, int dobound);
29 static void	passnotheader(void);
30 static void	passunixheader(void);
31 static Part*	problemchild(Part *p);
32 static void	readheader(Part *p);
33 static Hline*	readhl(void);
34 static void	readmtypes(void);
35 static int	save(Part *p, char *file);
36 static void	setfilename(Part *p, char *name);
37 static char*	skiptosemi(char *p);
38 static char*	skipwhite(char *p);
39 static String*	tokenconvert(String *t);
40 static void	writeheader(Part *p, int);
41 
42 enum
43 {
44 	/* encodings */
45 	Enone=	0,
46 	Ebase64,
47 	Equoted,
48 
49 	/* disposition possibilities */
50 	Dnone=	0,
51 	Dinline,
52 	Dfile,
53 	Dignore,
54 
55 	PAD64=	'=',
56 };
57 
58 /*
59  *  a message part; either the whole message or a subpart
60  */
61 struct Part
62 {
63 	Part	*pp;		/* parent part */
64 	Hline	*hl;		/* linked list of header lines */
65 	int	disposition;
66 	int	encoding;
67 	int	badfile;
68 	int	badtype;
69 	String	*boundary;	/* boundary for multiparts */
70 	int	blen;
71 	String	*charset;	/* character set */
72 	String	*type;		/* content type */
73 	String	*filename;	/* file name */
74 	Biobuf	*tmpbuf;		/* diversion input buffer */
75 };
76 
77 /*
78  *  a (multi)line header
79  */
80 struct Hline
81 {
82 	Hline	*next;
83 	String		*s;
84 };
85 
86 /*
87  *  header definitions for parsing
88  */
89 struct Hdef
90 {
91 	char *type;
92 	void (*f)(Part*, Hdef*, char*);
93 	int len;
94 };
95 
96 Hdef hdefs[] =
97 {
98 	{ "content-type:", ctype, },
99 	{ "content-transfer-encoding:", cencoding, },
100 	{ "content-disposition:", cdisposition, },
101 	{ 0, },
102 };
103 
104 /*
105  *  acceptable content types and their extensions
106  */
107 struct Mtype {
108 	Mtype	*next;
109 	char 	*ext;		/* extension */
110 	char	*gtype;		/* generic content type */
111 	char	*stype;		/* specific content type */
112 	char	class;
113 };
114 Mtype *mtypes;
115 
116 int justreject;
117 char *savefile;
118 
119 void
usage(void)120 usage(void)
121 {
122 	fprint(2, "usage: upas/vf [-r] [-s savefile]\n");
123 	exits("usage");
124 }
125 
126 void
main(int argc,char ** argv)127 main(int argc, char **argv)
128 {
129 	ARGBEGIN{
130 	case 'r':
131 		justreject = 1;
132 		break;
133 	case 's':
134 		savefile = EARGF(usage());
135 		break;
136 	default:
137 		usage();
138 	}ARGEND
139 
140 	if(argc)
141 		usage();
142 
143 	Binit(&in, 0, OREAD);
144 	Binit(&out, 1, OWRITE);
145 
146 	init_hdefs();
147 	readmtypes();
148 
149 	/* pass through our standard 'From ' line */
150 	passunixheader();
151 
152 	/* parse with the top level part */
153 	part(nil);
154 
155 	exits(0);
156 }
157 
158 void
refuse(char * reason)159 refuse(char *reason)
160 {
161 	char *full;
162 	static char msg[] =
163 		"mail refused: we don't accept executable attachments";
164 
165 	full = smprint("%s: %s", msg, reason);
166 	postnote(PNGROUP, getpid(), full);
167 	exits(full);
168 }
169 
170 
171 /*
172  *  parse a part; returns the ancestor whose boundary terminated
173  *  this part or nil on EOF.
174  */
175 static Part*
part(Part * pp)176 part(Part *pp)
177 {
178 	Part *p, *np;
179 
180 	p = mallocz(sizeof *p, 1);
181 	p->pp = pp;
182 	readheader(p);
183 
184 	if(p->boundary != nil){
185 		/* the format of a multipart part is always:
186 		 *   header
187 		 *   null or ignored body
188 		 *   boundary
189 		 *   header
190 		 *   body
191 		 *   boundary
192 		 *   ...
193 		 */
194 		writeheader(p, 1);
195 		np = passbody(p, 1);
196 		if(np != p)
197 			return np;
198 		for(;;){
199 			np = part(p);
200 			if(np != p)
201 				return np;
202 		}
203 	} else {
204 		/* no boundary */
205 		/* may still be multipart if this is a forwarded message */
206 		if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
207 			/* the format of forwarded message is:
208 			 *   header
209 			 *   header
210 			 *   body
211 			 */
212 			writeheader(p, 1);
213 			passnotheader();
214 			return part(p);
215 		} else {
216 			/*
217 			 * This is the meat.  This may be an executable.
218 			 * if so, wrap it and change its type
219 			 */
220 			if(p->badtype || p->badfile){
221 				if(p->badfile == 2){
222 					if(savefile != nil)
223 						save(p, savefile);
224 					syslog(0, "vf", "vf rejected %s %s",
225 						p->type? s_to_c(p->type): "?",
226 						p->filename?s_to_c(p->filename):"?");
227 					fprint(2, "The mail contained an executable attachment.\n");
228 					fprint(2, "We refuse all mail containing such.\n");
229 					refuse(nil);
230 				}
231 				np = problemchild(p);
232 				if(np != p)
233 					return np;
234 				/* if problemchild returns p, it turns out p is okay: fall thru */
235 			}
236 			writeheader(p, 1);
237 			return passbody(p, 1);
238 		}
239 	}
240 }
241 
242 /*
243  *  read and parse a complete header
244  */
245 static void
readheader(Part * p)246 readheader(Part *p)
247 {
248 	Hline *hl, **l;
249 	Hdef *hd;
250 
251 	l = &p->hl;
252 	for(;;){
253 		hl = readhl();
254 		if(hl == nil)
255 			break;
256 		*l = hl;
257 		l = &hl->next;
258 
259 		for(hd = hdefs; hd->type != nil; hd++){
260 			if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
261 				(*hd->f)(p, hd, s_to_c(hl->s));
262 				break;
263 			}
264 		}
265 	}
266 }
267 
268 /*
269  *  read a possibly multiline header line
270  */
271 static Hline*
readhl(void)272 readhl(void)
273 {
274 	Hline *hl;
275 	String *s;
276 	char *p;
277 	int n;
278 
279 	p = Brdline(&in, '\n');
280 	if(p == nil)
281 		return nil;
282 	n = Blinelen(&in);
283 	if(memchr(p, ':', n) == nil){
284 		Bseek(&in, -n, 1);
285 		return nil;
286 	}
287 	s = s_nappend(s_new(), p, n);
288 	for(;;){
289 		p = Brdline(&in, '\n');
290 		if(p == nil)
291 			break;
292 		n = Blinelen(&in);
293 		if(*p != ' ' && *p != '\t'){
294 			Bseek(&in, -n, 1);
295 			break;
296 		}
297 		s = s_nappend(s, p, n);
298 	}
299 	hl = malloc(sizeof *hl);
300 	hl->s = s;
301 	hl->next = nil;
302 	return hl;
303 }
304 
305 /*
306  *  write out a complete header
307  */
308 static void
writeheader(Part * p,int xfree)309 writeheader(Part *p, int xfree)
310 {
311 	Hline *hl, *next;
312 
313 	for(hl = p->hl; hl != nil; hl = next){
314 		Bprint(&out, "%s", s_to_c(hl->s));
315 		if(xfree)
316 			s_free(hl->s);
317 		next = hl->next;
318 		if(xfree)
319 			free(hl);
320 	}
321 	if(xfree)
322 		p->hl = nil;
323 }
324 
325 /*
326  *  pass a body through.  return if we hit one of our ancestors'
327  *  boundaries or EOF.  if we hit a boundary, return a pointer to
328  *  that ancestor.  if we hit EOF, return nil.
329  */
330 static Part*
passbody(Part * p,int dobound)331 passbody(Part *p, int dobound)
332 {
333 	Part *pp;
334 	Biobuf *b;
335 	char *cp;
336 
337 	for(;;){
338 		if(p->tmpbuf){
339 			b = p->tmpbuf;
340 			cp = Brdline(b, '\n');
341 			if(cp == nil){
342 				Bterm(b);
343 				p->tmpbuf = nil;
344 				goto Stdin;
345 			}
346 		}else{
347 		Stdin:
348 			b = &in;
349 			cp = Brdline(b, '\n');
350 		}
351 		if(cp == nil)
352 			return nil;
353 		for(pp = p; pp != nil; pp = pp->pp)
354 			if(pp->boundary != nil
355 			&& strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
356 				if(dobound)
357 					Bwrite(&out, cp, Blinelen(b));
358 				else
359 					Bseek(b, -Blinelen(b), 1);
360 				return pp;
361 			}
362 		Bwrite(&out, cp, Blinelen(b));
363 	}
364 }
365 
366 /*
367  *  save the message somewhere
368  */
369 static vlong bodyoff;	/* clumsy hack */
370 
371 static int
save(Part * p,char * file)372 save(Part *p, char *file)
373 {
374 	int fd;
375 	char *cp;
376 
377 	Bterm(&out);
378 	memset(&out, 0, sizeof(out));
379 
380 	fd = open(file, OWRITE);
381 	if(fd < 0)
382 		return -1;
383 	seek(fd, 0, 2);
384 	Binit(&out, fd, OWRITE);
385 	cp = ctime(time(0));
386 	cp[28] = 0;
387 	Bprint(&out, "From virusfilter %s\n", cp);
388 	writeheader(p, 0);
389 	bodyoff = Boffset(&out);
390 	passbody(p, 1);
391 	Bprint(&out, "\n");
392 	Bterm(&out);
393 	close(fd);
394 
395 	memset(&out, 0, sizeof out);
396 	Binit(&out, 1, OWRITE);
397 	return 0;
398 }
399 
400 /*
401  * write to a file but save the fd for passbody.
402  */
403 static char*
savetmp(Part * p)404 savetmp(Part *p)
405 {
406 	char *name;
407 	int fd;
408 
409 	name = mktemp(smprint("%s/vf.XXXXXXXXXXX", UPASTMP));
410 	if((fd = create(name, OWRITE|OEXCL, 0666)) < 0){
411 		fprint(2, "%s: error creating temporary file: %r\n", argv0);
412 		refuse("can't create temporary file");
413 	}
414 	close(fd);
415 	if(save(p, name) < 0){
416 		fprint(2, "%s: error saving temporary file: %r\n", argv0);
417 		refuse("can't write temporary file");
418 	}
419 	if(p->tmpbuf){
420 		fprint(2, "%s: error in savetmp: already have tmp file!\n",
421 			argv0);
422 		refuse("already have temporary file");
423 	}
424 	p->tmpbuf = Bopen(name, OREAD|ORCLOSE);
425 	if(p->tmpbuf == nil){
426 		fprint(2, "%s: error reading temporary file: %r\n", argv0);
427 		refuse("error reading temporary file");
428 	}
429 	Bseek(p->tmpbuf, bodyoff, 0);
430 	return name;
431 }
432 
433 /*
434  * Run the external checker to do content-based checks.
435  */
436 static int
runchecker(Part * p)437 runchecker(Part *p)
438 {
439 	int pid;
440 	char *name;
441 	Waitmsg *w;
442 
443 	if(access("/mail/lib/validateattachment", AEXEC) < 0)
444 		return 0;
445 
446 	name = savetmp(p);
447 	fprint(2, "run checker %s\n", name);
448 	switch(pid = fork()){
449 	case -1:
450 		sysfatal("fork: %r");
451 	case 0:
452 		dup(2, 1);
453 		execl("/mail/lib/validateattachment", "validateattachment",
454 			name, nil);
455 		_exits("exec failed");
456 	}
457 
458 	/*
459 	 * Okay to return on error - will let mail through but wrapped.
460 	 */
461 	w = wait();
462 	if(w == nil){
463 		syslog(0, "mail", "vf wait failed: %r");
464 		return 0;
465 	}
466 	if(w->pid != pid){
467 		syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid);
468 		return 0;
469 	}
470 	if(p->filename) {
471 		free(name);
472 		name = strdup(s_to_c(p->filename));
473 	}
474 	if(strstr(w->msg, "discard")){
475 		syslog(0, "mail", "vf validateattachment rejected %s", name);
476 		refuse("rejected by validateattachment");
477 	}
478 	if(strstr(w->msg, "accept")){
479 		syslog(0, "mail", "vf validateattachment accepted %s", name);
480 		return 1;
481 	}
482 	free(w);
483 	free(name);
484 	return 0;
485 }
486 
487 /*
488  *  emit a multipart Part that explains the problem
489  */
490 static Part*
problemchild(Part * p)491 problemchild(Part *p)
492 {
493 	Part *np;
494 	Hline *hl;
495 	String *boundary;
496 	char *cp;
497 
498 	/*
499 	 * We don't know whether the attachment is okay.
500 	 * If there's an external checker, let it have a crack at it.
501 	 */
502 	if(runchecker(p) > 0)
503 		return p;
504 
505 	if(justreject)
506 		return p;
507 
508 fprint(2, "x\n");
509 	syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
510 		p->filename?s_to_c(p->filename):"?");
511 fprint(2, "x\n");
512 
513 	boundary = mkboundary();
514 fprint(2, "x\n");
515 	/* print out non-mime headers */
516 	for(hl = p->hl; hl != nil; hl = hl->next)
517 		if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
518 			Bprint(&out, "%s", s_to_c(hl->s));
519 
520 fprint(2, "x\n");
521 	/* add in our own multipart headers and message */
522 	Bprint(&out, "Content-Type: multipart/mixed;\n");
523 	Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
524 	Bprint(&out, "Content-Disposition: inline\n");
525 	Bprint(&out, "\n");
526 	Bprint(&out, "This is a multi-part message in MIME format.\n");
527 	Bprint(&out, "--%s\n", s_to_c(boundary));
528 	Bprint(&out, "Content-Disposition: inline\n");
529 	Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
530 	Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
531 	Bprint(&out, "\n");
532 	Bprint(&out, "from postmaster@%s:\n", sysname());
533 	Bprint(&out, "The following attachment had content that we can't\n");
534 	Bprint(&out, "prove to be harmless.  To avoid possible automatic\n");
535 	Bprint(&out, "execution, we changed the content headers.\n");
536 	Bprint(&out, "The original header was:\n\n");
537 
538 	/* print out original header lines */
539 	for(hl = p->hl; hl != nil; hl = hl->next)
540 		if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
541 			Bprint(&out, "\t%s", s_to_c(hl->s));
542 	Bprint(&out, "--%s\n", s_to_c(boundary));
543 
544 	/* change file name */
545 	if(p->filename)
546 		s_append(p->filename, ".suspect");
547 	else
548 		p->filename = s_copy("file.suspect");
549 
550 	/* print out new header */
551 	Bprint(&out, "Content-Type: application/octet-stream\n");
552 	Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
553 	switch(p->encoding){
554 	case Enone:
555 		break;
556 	case Ebase64:
557 		Bprint(&out, "Content-Transfer-Encoding: base64\n");
558 		break;
559 	case Equoted:
560 		Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
561 		break;
562 	}
563 
564 fprint(2, "z\n");
565 	/* pass the body */
566 	np = passbody(p, 0);
567 
568 fprint(2, "w\n");
569 	/* add the new boundary and the original terminator */
570 	Bprint(&out, "--%s--\n", s_to_c(boundary));
571 	if(np && np->boundary){
572 		cp = Brdline(&in, '\n');
573 		Bwrite(&out, cp, Blinelen(&in));
574 	}
575 
576 fprint(2, "a %p\n", np);
577 	return np;
578 }
579 
580 static int
isattribute(char ** pp,char * attr)581 isattribute(char **pp, char *attr)
582 {
583 	char *p;
584 	int n;
585 
586 	n = strlen(attr);
587 	p = *pp;
588 	if(cistrncmp(p, attr, n) != 0)
589 		return 0;
590 	p += n;
591 	while(*p == ' ')
592 		p++;
593 	if(*p++ != '=')
594 		return 0;
595 	while(*p == ' ')
596 		p++;
597 	*pp = p;
598 	return 1;
599 }
600 
601 /*
602  *  parse content type header
603  */
604 static void
ctype(Part * p,Hdef * h,char * cp)605 ctype(Part *p, Hdef *h, char *cp)
606 {
607 	String *s;
608 
609 	cp += h->len;
610 	cp = skipwhite(cp);
611 
612 	p->type = s_new();
613 	cp = getstring(cp, p->type, 1);
614 	if(badtype(s_to_c(p->type)))
615 		p->badtype = 1;
616 
617 	while(*cp){
618 		if(isattribute(&cp, "boundary")){
619 			s = s_new();
620 			cp = getstring(cp, s, 0);
621 			p->boundary = s_reset(p->boundary);
622 			s_append(p->boundary, "--");
623 			s_append(p->boundary, s_to_c(s));
624 			p->blen = s_len(p->boundary);
625 			s_free(s);
626 		} else if(cistrncmp(cp, "multipart", 9) == 0){
627 			/*
628 			 *  the first unbounded part of a multipart message,
629 			 *  the preamble, is not displayed or saved
630 			 */
631 		} else if(isattribute(&cp, "name")){
632 			setfilename(p, cp);
633 		} else if(isattribute(&cp, "charset")){
634 			if(p->charset == nil)
635 				p->charset = s_new();
636 			cp = getstring(cp, s_reset(p->charset), 0);
637 		}
638 
639 		cp = skiptosemi(cp);
640 	}
641 }
642 
643 /*
644  *  parse content encoding header
645  */
646 static void
cencoding(Part * m,Hdef * h,char * p)647 cencoding(Part *m, Hdef *h, char *p)
648 {
649 	p += h->len;
650 	p = skipwhite(p);
651 	if(cistrncmp(p, "base64", 6) == 0)
652 		m->encoding = Ebase64;
653 	else if(cistrncmp(p, "quoted-printable", 16) == 0)
654 		m->encoding = Equoted;
655 }
656 
657 /*
658  *  parse content disposition header
659  */
660 static void
cdisposition(Part * p,Hdef * h,char * cp)661 cdisposition(Part *p, Hdef *h, char *cp)
662 {
663 	cp += h->len;
664 	cp = skipwhite(cp);
665 	while(*cp){
666 		if(cistrncmp(cp, "inline", 6) == 0){
667 			p->disposition = Dinline;
668 		} else if(cistrncmp(cp, "attachment", 10) == 0){
669 			p->disposition = Dfile;
670 		} else if(cistrncmp(cp, "filename=", 9) == 0){
671 			cp += 9;
672 			setfilename(p, cp);
673 		}
674 		cp = skiptosemi(cp);
675 	}
676 
677 }
678 
679 static void
setfilename(Part * p,char * name)680 setfilename(Part *p, char *name)
681 {
682 	if(p->filename == nil)
683 		p->filename = s_new();
684 	getstring(name, s_reset(p->filename), 0);
685 	p->filename = tokenconvert(p->filename);
686 	p->badfile = badfile(s_to_c(p->filename));
687 }
688 
689 static char*
skipwhite(char * p)690 skipwhite(char *p)
691 {
692 	while(isspace(*p))
693 		p++;
694 	return p;
695 }
696 
697 static char*
skiptosemi(char * p)698 skiptosemi(char *p)
699 {
700 	while(*p && *p != ';')
701 		p++;
702 	while(*p == ';' || isspace(*p))
703 		p++;
704 	return p;
705 }
706 
707 /*
708  *  parse a possibly "'d string from a header.  A
709  *  ';' terminates the string.
710  */
711 static char*
getstring(char * p,String * s,int dolower)712 getstring(char *p, String *s, int dolower)
713 {
714 	s = s_reset(s);
715 	p = skipwhite(p);
716 	if(*p == '"'){
717 		p++;
718 		for(;*p && *p != '"'; p++)
719 			if(dolower)
720 				s_putc(s, tolower(*p));
721 			else
722 				s_putc(s, *p);
723 		if(*p == '"')
724 			p++;
725 		s_terminate(s);
726 
727 		return p;
728 	}
729 
730 	for(; *p && !isspace(*p) && *p != ';'; p++)
731 		if(dolower)
732 			s_putc(s, tolower(*p));
733 		else
734 			s_putc(s, *p);
735 	s_terminate(s);
736 
737 	return p;
738 }
739 
740 static void
init_hdefs(void)741 init_hdefs(void)
742 {
743 	Hdef *hd;
744 	static int already;
745 
746 	if(already)
747 		return;
748 	already = 1;
749 
750 	for(hd = hdefs; hd->type != nil; hd++)
751 		hd->len = strlen(hd->type);
752 }
753 
754 /*
755  *  create a new boundary
756  */
757 static String*
mkboundary(void)758 mkboundary(void)
759 {
760 	char buf[32];
761 	int i;
762 	static int already;
763 
764 	if(already == 0){
765 		srand((time(0)<<16)|getpid());
766 		already = 1;
767 	}
768 	strcpy(buf, "upas-");
769 	for(i = 5; i < sizeof(buf)-1; i++)
770 		buf[i] = 'a' + nrand(26);
771 	buf[i] = 0;
772 	return s_copy(buf);
773 }
774 
775 /*
776  *  skip blank lines till header
777  */
778 static void
passnotheader(void)779 passnotheader(void)
780 {
781 	char *cp;
782 	int i, n;
783 
784 	while((cp = Brdline(&in, '\n')) != nil){
785 		n = Blinelen(&in);
786 		for(i = 0; i < n-1; i++)
787 			if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
788 				Bseek(&in, -n, 1);
789 				return;
790 			}
791 		Bwrite(&out, cp, n);
792 	}
793 }
794 
795 /*
796  *  pass unix header lines
797  */
798 static void
passunixheader(void)799 passunixheader(void)
800 {
801 	char *p;
802 	int n;
803 
804 	while((p = Brdline(&in, '\n')) != nil){
805 		n = Blinelen(&in);
806 		if(strncmp(p, "From ", 5) != 0){
807 			Bseek(&in, -n, 1);
808 			break;
809 		}
810 		Bwrite(&out, p, n);
811 	}
812 }
813 
814 /*
815  *  Read mime types
816  */
817 static void
readmtypes(void)818 readmtypes(void)
819 {
820 	Biobuf *b;
821 	char *p;
822 	char *f[6];
823 	Mtype *m;
824 	Mtype **l;
825 
826 	b = Bopen("/sys/lib/mimetype", OREAD);
827 	if(b == nil)
828 		return;
829 
830 	l = &mtypes;
831 	while((p = Brdline(b, '\n')) != nil){
832 		if(*p == '#')
833 			continue;
834 		p[Blinelen(b)-1] = 0;
835 		if(tokenize(p, f, nelem(f)) < 5)
836 			continue;
837 		m = mallocz(sizeof *m, 1);
838 		if(m == nil)
839 			goto err;
840 		m->ext = strdup(f[0]);
841 		if(m->ext == 0)
842 			goto err;
843 		m->gtype = strdup(f[1]);
844 		if(m->gtype == 0)
845 			goto err;
846 		m->stype = strdup(f[2]);
847 		if(m->stype == 0)
848 			goto err;
849 		m->class = *f[4];
850 		*l = m;
851 		l = &(m->next);
852 	}
853 	Bterm(b);
854 	return;
855 err:
856 	if(m == nil)
857 		return;
858 	free(m->ext);
859 	free(m->gtype);
860 	free(m->stype);
861 	free(m);
862 	Bterm(b);
863 }
864 
865 /*
866  *  if the class is 'm' or 'y', accept it
867  *  if the class is 'p' check a previous extension
868  *  otherwise, filename is bad
869  */
870 static int
badfile(char * name)871 badfile(char *name)
872 {
873 	char *p;
874 	Mtype *m;
875 	int rv;
876 
877 	p = strrchr(name, '.');
878 	if(p == nil)
879 		return 0;
880 
881 	for(m = mtypes; m != nil; m = m->next)
882 		if(cistrcmp(p, m->ext) == 0){
883 			switch(m->class){
884 			case 'm':
885 			case 'y':
886 				return 0;
887 			case 'p':
888 				*p = 0;
889 				rv = badfile(name);
890 				*p = '.';
891 				return rv;
892 			case 'r':
893 				return 2;
894 			}
895 		}
896 	return 1;
897 }
898 
899 /*
900  *  if the class is 'm' or 'y' or 'p', accept it
901  *  otherwise, filename is bad
902  */
903 static int
badtype(char * type)904 badtype(char *type)
905 {
906 	Mtype *m;
907 	char *s, *fix;
908 	int rv = 1;
909 
910 	fix = s = strchr(type, '/');
911 	if(s != nil)
912 		*s++ = 0;
913 	else
914 		s = "-";
915 
916 	for(m = mtypes; m != nil; m = m->next){
917 		if(cistrcmp(type, m->gtype) != 0)
918 			continue;
919 		if(cistrcmp(s, m->stype) != 0)
920 			continue;
921 		switch(m->class){
922 		case 'y':
923 		case 'p':
924 		case 'm':
925 			rv = 0;
926 			break;
927 		}
928 		break;
929 	}
930 
931 	if(fix != nil)
932 		*fix = '/';
933 	return rv;
934 }
935 
936 /* rfc2047 non-ascii */
937 typedef struct Charset Charset;
938 struct Charset {
939 	char *name;
940 	int len;
941 	int convert;
942 } charsets[] =
943 {
944 	{ "us-ascii",		8,	1, },
945 	{ "utf-8",		5,	0, },
946 	{ "iso-8859-1",		10,	1, },
947 };
948 
949 /*
950  *  convert to UTF if need be
951  */
952 static String*
tokenconvert(String * t)953 tokenconvert(String *t)
954 {
955 	String *s;
956 	char decoded[1024];
957 	char utfbuf[2*1024];
958 	int i, len;
959 	char *e;
960 	char *token;
961 
962 	token = s_to_c(t);
963 	len = s_len(t);
964 
965 	if(token[0] != '=' || token[1] != '?' ||
966 	   token[len-2] != '?' || token[len-1] != '=')
967 		goto err;
968 	e = token+len-2;
969 	token += 2;
970 
971 	/* bail if we don't understand the character set */
972 	for(i = 0; i < nelem(charsets); i++)
973 		if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
974 		if(token[charsets[i].len] == '?'){
975 			token += charsets[i].len + 1;
976 			break;
977 		}
978 	if(i >= nelem(charsets))
979 		goto err;
980 
981 	/* bail if it doesn't fit */
982 	if(strlen(token) > sizeof(decoded)-1)
983 		goto err;
984 
985 	/* bail if we don't understand the encoding */
986 	if(cistrncmp(token, "b?", 2) == 0){
987 		token += 2;
988 		len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
989 		decoded[len] = 0;
990 	} else if(cistrncmp(token, "q?", 2) == 0){
991 		token += 2;
992 		len = decquoted(decoded, token, e);
993 		if(len > 0 && decoded[len-1] == '\n')
994 			len--;
995 		decoded[len] = 0;
996 	} else
997 		goto err;
998 
999 	s = nil;
1000 	switch(charsets[i].convert){
1001 	case 0:
1002 		s = s_copy(decoded);
1003 		break;
1004 	case 1:
1005 		s = s_new();
1006 		latin1toutf(utfbuf, decoded, decoded+len);
1007 		s_append(s, utfbuf);
1008 		break;
1009 	}
1010 
1011 	return s;
1012 err:
1013 	return s_clone(t);
1014 }
1015 
1016 /*
1017  *  decode quoted
1018  */
1019 enum
1020 {
1021 	Self=	1,
1022 	Hex=	2,
1023 };
1024 uchar	tableqp[256];
1025 
1026 static void
initquoted(void)1027 initquoted(void)
1028 {
1029 	int c;
1030 
1031 	memset(tableqp, 0, 256);
1032 	for(c = ' '; c <= '<'; c++)
1033 		tableqp[c] = Self;
1034 	for(c = '>'; c <= '~'; c++)
1035 		tableqp[c] = Self;
1036 	tableqp['\t'] = Self;
1037 	tableqp['='] = Hex;
1038 }
1039 
1040 static int
hex2int(int x)1041 hex2int(int x)
1042 {
1043 	if(x >= '0' && x <= '9')
1044 		return x - '0';
1045 	if(x >= 'A' && x <= 'F')
1046 		return (x - 'A') + 10;
1047 	if(x >= 'a' && x <= 'f')
1048 		return (x - 'a') + 10;
1049 	return 0;
1050 }
1051 
1052 static char*
decquotedline(char * out,char * in,char * e)1053 decquotedline(char *out, char *in, char *e)
1054 {
1055 	int c, soft;
1056 
1057 	/* dump trailing white space */
1058 	while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
1059 		e--;
1060 
1061 	/* trailing '=' means no newline */
1062 	if(*e == '='){
1063 		soft = 1;
1064 		e--;
1065 	} else
1066 		soft = 0;
1067 
1068 	while(in <= e){
1069 		c = (*in++) & 0xff;
1070 		switch(tableqp[c]){
1071 		case Self:
1072 			*out++ = c;
1073 			break;
1074 		case Hex:
1075 			c = hex2int(*in++)<<4;
1076 			c |= hex2int(*in++);
1077 			*out++ = c;
1078 			break;
1079 		}
1080 	}
1081 	if(!soft)
1082 		*out++ = '\n';
1083 	*out = 0;
1084 
1085 	return out;
1086 }
1087 
1088 static int
decquoted(char * out,char * in,char * e)1089 decquoted(char *out, char *in, char *e)
1090 {
1091 	char *p, *nl;
1092 
1093 	if(tableqp[' '] == 0)
1094 		initquoted();
1095 
1096 	p = out;
1097 	while((nl = strchr(in, '\n')) != nil && nl < e){
1098 		p = decquotedline(p, in, nl);
1099 		in = nl + 1;
1100 	}
1101 	if(in < e)
1102 		p = decquotedline(p, in, e-1);
1103 
1104 	/* make sure we end with a new line */
1105 	if(*(p-1) != '\n'){
1106 		*p++ = '\n';
1107 		*p = 0;
1108 	}
1109 
1110 	return p - out;
1111 }
1112 
1113 /* translate latin1 directly since it fits neatly in utf */
1114 static int
latin1toutf(char * out,char * in,char * e)1115 latin1toutf(char *out, char *in, char *e)
1116 {
1117 	Rune r;
1118 	char *p;
1119 
1120 	p = out;
1121 	for(; in < e; in++){
1122 		r = (*in) & 0xff;
1123 		p += runetochar(p, &r);
1124 	}
1125 	*p = 0;
1126 	return p - out;
1127 }
1128