xref: /inferno-os/appl/svc/httpd/parser.b (revision 35f503c642e9dd127a2b989e4e12a10691cba3d4)
1implement Parser;
2
3include "sys.m";
4	sys: Sys;
5include "draw.m";
6	draw: Draw;
7include "bufio.m";
8include "string.m";
9	str: String;
10include "daytime.m";
11	daytime: Daytime;
12include "contents.m";
13	contents : Contents;
14	Content: import contents;
15include "cache.m";
16include "httpd.m";
17	Private_info: import Httpd;
18	Internal, TempFail, Unimp, UnkVers, BadCont, BadReq, Syntax,
19	BadSearch, NotFound, NoSearch , OnlySearch, Unauth, OK : import Httpd;
20include "parser.m";
21include "date.m";
22	date : Date;
23include "alarms.m";
24	alarms: Alarms;
25	Alarm: import alarms;
26include "lock.m";
27	locks: Lock;
28	Semaphore: import locks;
29
30Error: adt {
31	num : string;
32	concise: string;
33	verbose: string;
34};
35
36errormsg := array[] of {
37	Internal => Error("500 Internal Error", "Internal Error",
38		"This server could not process your request due to an interal error."),
39	TempFail =>	Error("500 Internal Error", "Temporary Failure",
40		"The object %s is currently inaccessible.<p>Please try again later."),
41	Unimp =>	Error("501 Not implemented", "Command not implemented",
42		"This server does not implement the %s command."),
43	UnkVers =>	Error("501 Not Implemented", "Unknown http version",
44		"This server does not know how to respond to http version %s."),
45	BadCont =>	Error("501 Not Implemented", "Impossible format",
46		"This server cannot produce %s in any of the formats your client accepts."),
47	BadReq =>	Error("400 Bad Request", "Strange Request",
48		"Your client sent a query that this server could not understand."),
49	Syntax =>	Error("400 Bad Request", "Garbled Syntax",
50		"Your client sent a query with incoherent syntax."),
51	BadSearch =>Error("400 Bad Request", "Inapplicable Search",
52		"Your client sent a search that cannot be applied to %s."),
53	NotFound =>Error("404 Not Found", "Object not found",
54		"The object %s does not exist on this server."),
55	NoSearch =>	Error("403 Forbidden", "Search not supported",
56		"The object %s does not support the search command."),
57	OnlySearch =>Error("403 Forbidden", "Searching Only",
58		"The object %s only supports the searching methods."),
59	Unauth =>	Error("401 Unauthorized", "Unauthorized",
60		"You are not authorized to see the object %s."),
61	OK =>	Error("200 OK", "everything is fine","Groovy man"),
62};
63
64badmodule(p: string)
65{
66	sys->fprint(sys->fildes(2), "parse: cannot load %s: %r", p);
67	raise "fail:bad module";
68}
69
70lock: ref Semaphore;
71
72init()
73{
74	sys = load Sys Sys->PATH;
75
76	date = load Date Date->PATH;
77	if (date==nil) badmodule(Date->PATH);
78
79	daytime = load Daytime Daytime->PATH;
80	if(daytime == nil) badmodule(Daytime->PATH);
81
82	contents = load Contents Contents->PATH;
83	if(contents == nil) badmodule(Contents->PATH);
84
85	str = load String String->PATH;
86	if(str == nil) badmodule(String->PATH);
87
88	alarms = load Alarms Alarms->PATH;
89	if(alarms == nil) badmodule(Alarms->PATH);
90
91	locks = load Lock Lock->PATH;
92	if(locks == nil) badmodule(Lock->PATH);
93	locks->init();
94	lock = Semaphore.new();
95	date->init();
96}
97
98atexit(g: ref Private_info)
99{
100	if (g.dbg_log!=nil){
101		sys->fprint(g.dbg_log,"At exit from parse, closing fds. \n");
102	}
103	if (g.bin!=nil)
104		g.bufio->g.bin.close();
105	if (g.bout!=nil)
106		g.bufio->g.bout.close();
107	g.bin=nil;
108	g.bout=nil;
109	exit;
110}
111
112
113httpheaders(g: ref Private_info,vers : string)
114{
115	if(vers == "")
116		return;
117	g.tok = '\n';
118	# 15 minutes to get request line
119	a := Alarm.alarm(15*1000*60);
120	while(lex(g) != '\n'){
121		if(g.tok == Word && lex(g) == ':'){
122			if (g.dbg_log!=nil)
123				sys->fprint(g.dbg_log,"hitting parsejump. wordval is %s\n",
124										g.wordval);
125			parsejump(g,g.wordval);
126		}
127		while(g.tok != '\n')
128			lex(g);
129	}
130	a.stop();
131}
132
133
134mimeok(g: ref Private_info,name : string,multipart : int,head : list of ref Content): list of ref Content
135{
136
137	generic, specific, s : string;
138	v : real;
139
140	while(lex(g) != Word)
141		if(g.tok != ',')
142			return head;
143
144	generic = g.wordval;
145	lex(g);
146	if(g.tok == '/' || multipart){
147		if(g.tok != '/')
148			return head;
149		if(lex(g) != Word)
150			return head;
151		specific = g.wordval;
152		lex(g);
153	}else
154		specific = "*";
155	tmp := contents->mkcontent(generic, specific);
156	head = tmp::head;
157	for(;;){
158		case g.tok {
159		';' =>
160			if(lex(g) == Word){
161				s = g.wordval;
162				if(lex(g) != '=' || lex(g) != Word)
163					return head;
164				v = 3.14; # should be strtof(g.wordval, nil);
165				if(s=="q")
166					tmp.q = v;
167				else
168					logit(g,sys->sprint(
169						"unknown %s param: %s %s",
170						name, s, g.wordval));
171			}
172			break;
173		',' =>
174			return  mimeok(g,name, multipart,head);
175		* =>
176			return head;
177		}
178		lex(g);
179	}
180	return head;
181}
182
183mimeaccept(g: ref Private_info,name : string)
184{
185	g.oktype = mimeok(g,name, 1, g.oktype);
186}
187
188mimeacceptenc(g: ref Private_info,name : string)
189{
190	g.okencode = mimeok(g,name, 0, g.okencode);
191}
192
193mimeacceptlang(g: ref Private_info,name : string)
194{
195	g.oklang = mimeok(g,name, 0, g.oklang);
196}
197
198mimemodified(g: ref Private_info,name : string)
199{
200	lexhead(g);
201	g.modtime = date->date2sec(g.wordval);
202	if (g.dbg_log!=nil){
203		sys->fprint(g.dbg_log,"modtime %d\n",g.modtime);
204	}
205	if(g.modtime == 0)
206		logit(g,sys->sprint("%s: %s", name, g.wordval));
207}
208
209
210mimeagent(g: ref Private_info,nil : string)
211{
212	lexhead(g);
213	g.client = g.wordval;
214}
215
216mimefrom(g: ref Private_info,nil : string)
217{
218	lexhead(g);
219}
220
221
222mimehost(g: ref Private_info,nil : string)
223{
224	h : string;
225	lexhead(g);
226	(nil,h)=str->splitr(g.wordval," \t");
227	g.host = h;
228}
229
230mimereferer(g: ref Private_info,nil : string)
231{
232	h : string;
233	lexhead(g);
234	(nil,h)=str->splitr(g.wordval," \t");
235	g.referer = h;
236}
237
238mimeclength(g: ref Private_info,nil : string)
239{
240	h : string;
241	lexhead(g);
242	(nil,h)=str->splitr(g.wordval," \t");
243	g.clength = int h;
244}
245
246mimectype(g: ref Private_info,nil : string)
247{
248	h : string;
249	lexhead(g);
250	(nil,h)=str->splitr(g.wordval," \t");
251	g.ctype = h;
252}
253
254
255mimeignore(g: ref Private_info,nil : string)
256{
257	lexhead(g);
258}
259
260
261mimeunknown(g: ref Private_info,name : string)
262{
263	lexhead(g);
264	if(g.client!="")
265		logit(g,sys->sprint("agent %s: ignoring header %s: %s ",
266			g.client, name, g.wordval));
267	else
268		logit(g,sys->sprint("ignoring header %s: %s", name, g.wordval));
269}
270
271
272parsejump(g: ref Private_info,k : string)
273{
274	case k {
275
276	"from" =>
277		mimefrom(g,k);
278	"if-modified-since" =>
279		mimemodified(g,k);
280	"accept" =>
281		mimeaccept(g,k);
282	"accept-encoding" =>
283		mimeacceptenc(g,k);
284	"accept-language" =>
285		mimeacceptlang(g,k);
286	"user-agent" =>
287		mimeagent(g,k);
288	"host" =>
289		mimehost(g,k);
290	"referer" =>
291		mimereferer(g,k);
292	"content-length" =>
293		mimeclength(g,k);
294	"content-type" =>
295		mimectype(g,k);
296	"authorization" or "chargeto" or "connection" or "forwarded" or
297	"pragma" or "proxy-agent" or "proxy-connection" or
298	"x-afs-tokens" or "x-serial-number" =>
299		mimeignore(g,k);
300	* =>
301		mimeunknown(g,k);
302	};
303}
304
305lex(g: ref Private_info): int
306{
307	g.tok = lex1(g);
308	return g.tok;
309}
310
311
312# rfc 822/rfc 1521 lexical analyzer
313lex1(g: ref Private_info): int
314{
315	level, c : int;
316	if(g.parse_eof)
317		return '\n';
318
319# top:
320	for(;;){
321		c = getc(g);
322		case c {
323			 '(' =>
324				level = 1;
325				while((c = getc(g)) != Bufio->EOF){
326					if(c == '\\'){
327						c = getc(g);
328						if(c == Bufio->EOF)
329							return '\n';
330						continue;
331					}
332					if(c == '(')
333						level++;
334					else if(c == ')' && level == 1){
335						level--;
336						break;
337					}
338					else if(c == '\n'){
339						c = getc(g);
340						if(c == Bufio->EOF)
341							return '\n';
342							break;
343						if(c != ' ' && c != '\t'){
344							ungetc(g);
345							return '\n';
346						}
347					}
348				}
349	 		' ' or '\t' or '\r' =>
350				break;
351	 		'\n' =>
352				if(g.tok == '\n'){
353					g.parse_eof = 1;
354					return '\n';
355				}
356				c = getc(g);
357				if(c == Bufio->EOF)
358					return '\n';
359				if(c != ' ' && c != '\t'){
360					ungetc(g);
361					return '\n';
362				}
363			')' or '<' or '>' or '[' or ']' or '@' or '/' or ','
364			or ';' or ':' or '?' or '=' =>
365				return c;
366
367	 		'"' =>
368				word(g,"\"");
369				getc(g);		# skip the closing quote
370				return Word;
371
372	 		* =>
373				ungetc(g);
374				word(g,"\"()<>@,;:/[]?=\r\n \t");
375				return Word;
376			}
377	}
378	return 0;
379}
380
381# return the rest of an rfc 822, not including \r or \n
382# do not map to lower case
383
384lexhead(g: ref Private_info)
385{
386	c, n: int;
387	n = 0;
388	while((c = getc(g)) != Bufio->EOF){
389		if(c == '\r')
390			c = wordcr(g);
391		else if(c == '\n')
392			c = wordnl(g);
393		if(c == '\n')
394			break;
395		if(c == '\\'){
396			c = getc(g);
397			if(c == Bufio->EOF)
398				break;
399		}
400		g.wordval[n++] = c;
401	}
402	g.tok = '\n';
403	g.wordval= g.wordval[0:n];
404}
405
406word(g: ref Private_info,stop : string)
407{
408	c : int;
409	n := 0;
410	while((c = getc(g)) != Bufio->EOF){
411		if(c == '\r')
412			c = wordcr(g);
413		else if(c == '\n')
414			c = wordnl(g);
415		if(c == '\\'){
416			c = getc(g);
417			if(c == Bufio->EOF)
418				break;
419		}else if(str->in(c,stop)){
420				ungetc(g);
421				g.wordval = g.wordval[0:n];
422				return;
423			}
424		if(c >= 'A' && c <= 'Z')
425			c += 'a' - 'A';
426		g.wordval[n++] = c;
427	}
428	g.wordval = g.wordval[0:n];
429	# sys->print("returning from word");
430}
431
432
433wordcr(g: ref Private_info): int
434{
435	c := getc(g);
436	if(c == '\n')
437		return wordnl(g);
438	ungetc(g);
439	return ' ';
440}
441
442
443wordnl(g: ref Private_info): int
444{
445	c := getc(g);
446	if(c == ' ' || c == '\t')
447		return c;
448	ungetc(g);
449	return '\n';
450}
451
452
453getc(g: ref Private_info): int
454{
455	c := g.bufio->g.bin.getc();
456	if(c == Bufio->EOF){
457		g.parse_eof = 1;
458		return c;
459	}
460	return c & 16r7f;
461}
462
463ungetc(g: ref Private_info)
464{
465	# this is a dirty hack, I am tacitly assuming that characters read
466	# from stdin will be ASCII.....
467	g.bufio->g.bin.ungetc();
468}
469
470# go from url with ascii and %xx escapes to unicode, allowing for existing unencoded utf-8
471
472urlunesc(s : string): string
473{
474	a := array[Sys->UTFmax*len s] of byte;
475	o := 0;
476	for(i := 0; i < len s; i++){
477		c := int s[i];
478		if(c < Runeself){
479			if(c == '%' && i+2 < len s){
480				d0 := hex(int s[i+1]);
481				if(d0 >= 0){
482					d1 := hex(int s[i+2]);
483					if(d1 >= 0){
484						i += 2;
485						c = d0*16 + d1;
486					}
487				}
488			} else if(c == '+'  || c == 0)
489				c = ' ';
490			a[o++] = byte c;
491		}else
492			o += sys->char2byte(c, a, o);
493	}
494	return string a[0: o];
495}
496
497hex(c: int): int
498{
499	if(c >= '0' && c <= '9')
500		return c-'0';
501	if(c >= 'a' && c <= 'f')
502		return c-'a' + 10;
503	if(c >= 'A' && c <= 'F')
504		return c-'A' + 10;
505	return -1;
506}
507
508# write a failure message to the net and exit
509fail(g: ref Private_info,reason : int, message : string)
510{
511	verb : string;
512	title:=sys->sprint("<head><title>%s</title></head>\n<body bgcolor=#ffffff>\n",
513					errormsg[reason].concise);
514	body1:=	"<h1> Error </h1>\n<P>" +
515		"Sorry, Charon is unable to process your request. The webserver reports"+
516		" the following error <P><b>";
517	#concise error
518	body2:="</b><p>for the URL\n<P><b>";
519	#message
520	body3:="</b><P>with the following reason:\n<P><b>";
521	#reason
522	if (str->in('%',errormsg[reason].verbose)){
523		(v1,v2):=str->splitl(errormsg[reason].verbose,"%");
524		verb=v1+message+v2[2:];
525	}else
526		verb=errormsg[reason].verbose;
527	body4:="</b><hr> This Webserver powered by <img src=\"/inferno.gif\">. <P>"+
528		"For more information click <a href=\"http://inferno.lucent.com\"> here </a>\n"+
529		"<hr><address>\n";
530	dtime:=sys->sprint("This information processed at %s.\n",daytime->time());
531	body5:="</address>\n</body>\n";
532	strbuf:=title+body1+errormsg[reason].concise+body2+message+body3+
533		verb+body4+dtime+body5;
534	if (g.bout!=nil && reason!=2){
535		g.bufio->g.bout.puts(sys->sprint("%s %s\r\n", g.version, errormsg[reason].num));
536		g.bufio->g.bout.puts(sys->sprint("Date: %s\r\n", daytime->time()));
537		g.bufio->g.bout.puts(sys->sprint("Server: Charon\r\n"));
538		g.bufio->g.bout.puts(sys->sprint("MIME-version: 1.0\r\n"));
539		g.bufio->g.bout.puts(sys->sprint("Content-Type: text/html\r\n"));
540		g.bufio->g.bout.puts(sys->sprint("Content-Length: %d\r\n", len strbuf));
541		g.bufio->g.bout.puts(sys->sprint("\r\n"));
542		g.bufio->g.bout.puts(strbuf);
543		g.bufio->g.bout.flush();
544	}
545	logit(g,sys->sprint("failing: %s", errormsg[reason].num));
546	atexit(g);
547}
548
549
550# write successful header
551
552okheaders(g: ref Private_info)
553{
554	g.bufio->g.bout.puts(sys->sprint("%s 200 OK\r\n", g.version));
555	g.bufio->g.bout.puts("Server: Charon\r\n");
556	g.bufio->g.bout.puts("MIME-version: 1.0\r\n");
557}
558
559notmodified(g: ref Private_info)
560{
561	g.bufio->g.bout.puts(sys->sprint("%s 304 Not Modified\r\n", g.version));
562	g.bufio->g.bout.puts("Server: Charon\r\n");
563	g.bufio->g.bout.puts("MIME-version: 1.0\r\n\r\n");
564	atexit(g);
565}
566
567logit(g: ref Private_info,message : string )
568{
569	lock.obtain();
570	sys->fprint(g.logfile,"%s %s\n", g.remotesys, message);
571	lock.release();
572}
573
574urlconv(p : string): string
575{
576	a := array[Sys->UTFmax] of byte;
577	t := "";
578	for(i := 0; i < len p; i++){
579		c := p[i];
580		if(c == 0)
581			continue;	# ignore nul bytes
582		if(c >= Runeself){	# convert to UTF-8
583			n := sys->char2byte(c, a, 0);
584			for(j := 0; j < n; j++)
585				t += sys->sprint("%%%.2X", int a[j]);
586		}else if(c <= ' ' || c == '%'){
587			t += sys->sprint("%%%2.2X", c);
588		} else {
589			t[len t] = c;
590		}
591	}
592	return t;
593}
594