xref: /inferno-os/appl/lib/rfc822.b (revision 0e96539ff7cff23233d3f0a64bb285b385a3a1f4)
1implement RFC822;
2
3include "sys.m";
4	sys: Sys;
5
6include "bufio.m";
7	bufio: Bufio;
8	Iobuf: import bufio;
9
10include "rfc822.m";
11
12include "string.m";
13	str: String;
14
15include "daytime.m";
16	daytime: Daytime;
17	Tm: import daytime;
18
19Minrequest: con 512;	# more than enough for most requests
20
21Suffix: adt {
22	suffix: string;
23	generic: string;
24	specific: string;
25	encoding: string;
26};
27
28SuffixFile: con "/lib/mimetype";
29mtime := 0;
30qid: Sys->Qid;
31
32suffixes: list of ref Suffix;
33
34nomod(s: string)
35{
36	raise sys->sprint("internal: can't load %s: %r", s);
37}
38
39init(b: Bufio)
40{
41	sys = load Sys Sys->PATH;
42	bufio = b;
43	str = load String String->PATH;
44	if(str == nil)
45		nomod(String->PATH);
46	daytime = load Daytime Daytime->PATH;
47	if(daytime == nil)
48		nomod(Daytime->PATH);
49	readsuffixfile();
50}
51
52readheaders(fd: ref Iobuf, limit: int): array of (string, array of byte)
53{
54	n := 0;
55	s := 0;
56	b := array[Minrequest] of byte;
57	nline := 0;
58	lines: list of array of byte;
59	while((c := fd.getb()) >= 0){
60		if(c == '\r'){
61			c = fd.getb();
62			if(c < 0)
63				break;
64			if(c != '\n'){
65				fd.ungetb();
66				c = '\r';
67			}
68		}
69		if(n >= len b){
70			if(len b >= limit)
71				return nil;
72			ab := array[n+512] of byte;
73			ab[0:] = b;
74			b = ab;
75		}
76		b[n++] = byte c;
77		if(c == '\n'){
78			if(n == 1 || b[n-2] == byte '\n')
79				break;	# empty line
80			c = fd.getb();
81			if(c < 0)
82				break;
83			if(c != ' ' && c != '\t'){	# not continued
84				fd.ungetb();
85				lines = b[s: n] :: lines;
86				nline++;
87				s = n;
88			}else
89				b[n-1] = byte ' ';
90		}
91	}
92	if(n == 0)
93		return nil;
94	b = b[0: n];
95	if(n != s){
96		lines = b[s:n] :: lines;
97		nline++;
98	}
99	a := array[nline] of (string, array of byte);
100	for(; lines != nil; lines = tl lines){
101		b = hd lines;
102		name := "";
103		for(i := 0; i < len b; i++)
104			if(b[i] == byte ':'){
105				name = str->tolower(string b[0:i]);
106				b = b[i+1:];
107				break;
108			}
109		a[--nline] = (name, b);
110	}
111	return a;
112}
113
114#
115# *(";" parameter) used in transfer-extension, media-type and media-range
116# parameter = attribute "=" value
117# attribute = token
118# value = token | quoted-string
119#
120parseparams(ps: ref Rfclex): list of (string, string)
121{
122	l: list of (string, string);
123	do{
124		if(ps.lex() != Word)
125			break;
126		attr := ps.wordval;
127		if(ps.lex() != '=' || ps.lex() != Word && ps.tok != QString)
128			break;
129		l = (attr, ps.wordval) :: l;
130	}while(ps.lex() == ';');
131	ps.unlex();
132	return rev(l);
133}
134
135#
136# 1#transfer-coding
137#
138mimefields(ps: ref Rfclex): list of (string, list of (string, string))
139{
140	rf: list of (string, list of (string, string));
141	do{
142		if(ps.lex() == Word){
143			w := ps.wordval;
144			if(ps.lex() == ';'){
145				rf = (w, parseparams(ps)) :: rf;
146				ps.lex();
147			}else
148				rf = (w, nil) :: rf;
149		}
150	}while(ps.tok == ',');
151	ps.unlex();
152	f: list of (string, list of (string, string));
153	for(; rf != nil; rf = tl rf)
154		f = hd rf :: f;
155	return f;
156}
157
158#	#(media-type | (media-range [accept-params]))	; Content-Type and Accept
159#
160#       media-type     = type "/" subtype *( ";" parameter )
161#       type           = token
162#       subtype        = token
163#	LWS must not be used between type and subtype, nor between attribute and value (in parameter)
164#
165#	media-range = ("*/*" | type "/*" | type "/" subtype ) *(";' parameter)
166#    	accept-params  = ";" "q" "=" qvalue *( accept-extension )
167#	accept-extension = ";" token [ "=" ( token | quoted-string ) ]
168#
169#	1#( ( charset | "*" )[ ";" "q" "=" qvalue ] )		; Accept-Charset
170#	1#( codings [ ";" "q" "=" qvalue ] )			; Accept-Encoding
171#	1#( language-range [ ";" "q" "=" qvalue ] )		; Accept-Language
172#
173#	codings = ( content-coding | "*" )
174#
175parsecontent(ps: ref Rfclex, multipart: int, head: list of ref Content): list of ref Content
176{
177	do{
178		if(ps.lex() == Word){
179			generic := ps.wordval;
180			specific := "*";
181			if(ps.lex() == '/'){
182				if(ps.lex() != Word)
183					break;
184				specific = ps.wordval;
185				if(!multipart && specific != "*")
186					break;
187			}else if(multipart)
188				break;	# syntax error
189			else
190				ps.unlex();
191			params: list of (string, string) = nil;
192			if(ps.lex() == ';'){
193				params = parseparams(ps);
194				ps.lex();
195			}
196			head = Content.mk(generic, specific, params) :: head;	# order reversed, but doesn't matter
197		}
198	}while(ps.tok == ',');
199	ps.unlex();
200	return head;
201}
202
203rev(l: list of (string, string)): list of (string, string)
204{
205	rl: list of (string, string);
206	for(; l != nil; l = tl l)
207		rl = hd l :: rl;
208	return rl;
209}
210
211Rfclex.mk(a: array of byte): ref Rfclex
212{
213	ps := ref Rfclex;
214	ps.fd = bufio->aopen(a);
215	ps.tok = '\n';
216	ps.eof = 0;
217	return ps;
218}
219
220Rfclex.getc(ps: self ref Rfclex): int
221{
222	c := ps.fd.getb();
223	if(c < 0)
224		ps.eof = 1;
225	return c;
226}
227
228Rfclex.ungetc(ps: self ref Rfclex)
229{
230	if(!ps.eof)
231		ps.fd.ungetb();
232}
233
234Rfclex.lex(ps: self ref Rfclex): int
235{
236	if(ps.seen != nil){
237		(ps.tok, ps.wordval) = hd ps.seen;
238		ps.seen = tl ps.seen;
239	}else
240		ps.tok = lex1(ps, 0);
241	return ps.tok;
242}
243
244Rfclex.unlex(ps: self ref Rfclex)
245{
246	ps.seen = (ps.tok, ps.wordval) :: ps.seen;
247}
248
249Rfclex.skipws(ps: self ref Rfclex): int
250{
251	return lex1(ps, 1);
252}
253
254#
255# rfc 2822/rfc 1521 lexical analyzer
256#
257lex1(ps: ref Rfclex, skipwhite: int): int
258{
259	ps.wordval = nil;
260	while((c := ps.getc()) >= 0){
261		case c {
262		 '(' =>
263			level := 1;
264			while((c = ps.getc()) != Bufio->EOF && c != '\n'){
265				if(c == '\\'){
266					c = ps.getc();
267					if(c == Bufio->EOF)
268						return '\n';
269					continue;
270				}
271				if(c == '(')
272					level++;
273				else if(c == ')' && --level == 0)
274					break;
275			}
276 		' ' or '\t' or '\r' or 0 =>
277			;
278 		'\n' =>
279			return '\n';
280		')' or '<' or '>' or '[' or ']' or '@' or '/' or ',' or
281		';' or ':' or '?' or '=' =>
282			if(skipwhite){
283				ps.ungetc();
284				return c;
285			}
286			return c;
287
288 		'"' =>
289			if(skipwhite){
290				ps.ungetc();
291				return c;
292			}
293			word(ps,"\"");
294			ps.getc();		# skip the closing quote
295			return QString;
296
297 		* =>
298			ps.ungetc();
299			if(skipwhite)
300				return c;
301			word(ps,"\"()<>@,;:/[]?={}\r\n \t");
302			return Word;
303		}
304	}
305	return '\n';
306}
307
308# return the rest of an rfc 822 line, not including \r or \n
309# do not map to lower case
310
311Rfclex.line(ps: self ref Rfclex): string
312{
313	s := "";
314	while((c := ps.getc()) != Bufio->EOF && c != '\n' && c != '\r'){
315		if(c == '\\'){
316			c = ps.getc();
317			if(c == Bufio->EOF)
318				break;
319		}
320		s[len s] = c;
321	}
322	ps.tok = '\n';
323	ps.wordval = s;
324	return s;
325}
326
327word(ps: ref Rfclex, stop: string)
328{
329	w := "";
330	while((c := ps.getc()) != Bufio->EOF){
331		if(c == '\r')
332			c = ' ';
333		if(c == '\\'){
334			c = ps.getc();
335			if(c == Bufio->EOF)
336				break;
337		}else if(str->in(c,stop)){
338			ps.ungetc();
339			break;
340		}
341		if(c >= 'A' && c <= 'Z')
342			c += 'a' - 'A';
343		w[len w] = c;
344	}
345	ps.wordval = w;
346}
347
348readsuffixfile(): string
349{
350	iob := bufio->open(SuffixFile, Bufio->OREAD);
351	if(iob == nil)
352		return sys->sprint("cannot open %s: %r", SuffixFile);
353	for(n := 1; (line := iob.gets('\n')) != nil; n++){
354		(s, nil) := parsesuffix(line);
355		if(s != nil)
356			suffixes =  s :: suffixes;
357	}
358	return nil;
359}
360
361parsesuffix(line: string): (ref Suffix, string)
362{
363	(line, nil) = str->splitstrl(line, "#");
364	if(line == nil)
365		return (nil, nil);
366	(n, slist) := sys->tokenize(line,"\n\t ");
367	if(n == 0)
368		return (nil, nil);
369	if(n < 4)
370		return (nil, "too few fields");
371	s := ref Suffix;
372	s.suffix = hd slist;
373	slist = tl slist;
374	s.generic = hd slist;
375	if (s.generic == "-")
376		s.generic = "";
377	slist = tl slist;
378	s.specific = hd slist;
379	if (s.specific == "-")
380		s.specific = "";
381	slist = tl slist;
382	s.encoding = hd slist;
383	if (s.encoding == "-")
384		s.encoding = "";
385	if((s.generic == nil || s.specific == nil) && s.encoding == nil)
386		return (nil, nil);
387	return (s, nil);
388}
389
390#
391# classify by file suffix
392#
393suffixclass(name: string): (ref Content, ref Content)
394{
395	typ, enc: ref Content;
396
397	p := str->splitstrr(name, "/").t1;
398	if(p != nil)
399		name = p;
400
401	for(;;){
402		(name, p) = suffix(name);	# TO DO: match below is case sensitive
403		if(p == nil)
404			break;
405		for(l := suffixes; l != nil; l = tl l){
406			s := hd l;
407			if(p == s.suffix){
408				if(s.generic != nil && typ == nil)
409					typ = Content.mk(s.generic, s.specific, nil);
410				if(s.encoding != nil && enc == nil)
411					enc = Content.mk(s.encoding, "", nil);
412				if(typ != nil && enc != nil)
413					break;
414			}
415		}
416	}
417	return (typ, enc);
418}
419
420suffix(s: string): (string, string)
421{
422	for(n := len s; --n >= 0;)
423		if(s[n] == '.')
424			return (s[0: n], s[n:]);
425	return (s, nil);
426}
427
428#
429#  classify by initial contents of file
430#
431dataclass(a: array of byte): (ref Content, ref Content)
432{
433	utf8 := 0;
434	for(i := 0; i < len a;){
435		c := int a[i];
436		if(c < 16r80){
437			if(c < 32 && c != '\n' && c != '\r' && c != '\t' && c != '\v' && c != '\f')
438				return (nil, nil);
439			i++;
440		}else{
441			utf8 = 1;
442			(r, l, nil) := sys->byte2char(a, i);
443			if(r == Sys->UTFerror)
444				return (nil, nil);
445			i += l;
446		}
447	}
448	if(utf8)
449		params := ("charset", "utf-8") :: nil;
450	return (Content.mk("text", "plain", params), nil);
451}
452
453Content.mk(generic, specific: string, params: list of (string, string)): ref Content
454{
455	c := ref Content;
456	c.generic = generic;
457	c.specific = specific;
458	c.params = params;
459	return c;
460}
461
462Content.check(me: self ref Content, oks: list of ref Content): int
463{
464	if(oks == nil)
465		return 1;
466	g := str->tolower(me.generic);
467	s := str->tolower(me.specific);
468	for(; oks != nil; oks = tl oks){
469		ok := hd oks;
470		if((ok.generic == g || ok.generic=="*") &&
471		   (s == nil || ok.specific == s || ok.specific=="*"))
472			return 1;
473	}
474	return 0;
475}
476
477Content.text(c: self ref Content): string
478{
479	if((s := c.specific) != nil)
480		s = c.generic+"/"+s;
481	else
482		s = c.generic;
483	for(l := c.params; l != nil; l = tl l){
484		(n, v) := hd l;
485		s += sys->sprint(";%s=%s", n, quote(v));
486	}
487	return s;
488}
489
490#
491# should probably be in a Mime or HTTP module
492#
493
494Quotable: con "()<>@,;:\\\"/[]?={} \t";
495
496quotable(s: string): int
497{
498	for(i := 0; i < len s; i++)
499		if(str->in(s[i], Quotable))
500			return 1;
501	return 0;
502}
503
504quote(s: string): string
505{
506	if(!quotable(s))
507		return s;
508	q :=  "\"";
509	for(i := 0; i < len s; i++){
510		if(str->in(s[i], Quotable))
511			q[len q] = '\\';
512		q[len q] = s[i];
513	}
514	q[len q] = '"';
515	return q;
516}
517
518weekdays := array[] of {
519	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
520};
521
522months := array[] of {
523	"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
524};
525
526# print dates in the format
527# Wkd, DD Mon YYYY HH:MM:SS GMT
528
529sec2date(t: int): string
530{
531	tm := daytime->gmt(t);
532	return sys->sprint("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT",
533		weekdays[tm.wday], tm.mday, months[tm.mon], tm.year+1900,
534		tm.hour, tm.min, tm.sec);
535}
536
537# parse dates of formats
538# Wkd, DD Mon YYYY HH:MM:SS GMT
539# Weekday, DD-Mon-YY HH:MM:SS GMT
540# Wkd Mon ( D|DD) HH:MM:SS YYYY
541# plus anything similar
542
543date2sec(date: string): int
544{
545	tm := daytime->string2tm(date);
546	if(tm == nil || tm.year < 70 || tm.zone != "GMT")
547		t := 0;
548	else
549		t = daytime->tm2epoch(tm);
550	return t;
551}
552
553now(): int
554{
555	return daytime->now();
556}
557
558time(): string
559{
560	return sec2date(daytime->now());
561}
562