1implement Parser; 2 3include "sys.m"; 4 sys: Sys; 5include "draw.m"; 6 draw: Draw; 7include "bufio.m"; 8include "string.m"; 9 str: String; 10include "daytime.m"; 11 daytime: Daytime; 12include "contents.m"; 13 contents : Contents; 14 Content: import contents; 15include "cache.m"; 16include "httpd.m"; 17 Private_info: import Httpd; 18 Internal, TempFail, Unimp, UnkVers, BadCont, BadReq, Syntax, 19 BadSearch, NotFound, NoSearch , OnlySearch, Unauth, OK : import Httpd; 20include "parser.m"; 21include "date.m"; 22 date : Date; 23include "alarms.m"; 24 alarms: Alarms; 25 Alarm: import alarms; 26include "lock.m"; 27 locks: Lock; 28 Semaphore: import locks; 29 30Error: adt { 31 num : string; 32 concise: string; 33 verbose: string; 34}; 35 36errormsg := array[] of { 37 Internal => Error("500 Internal Error", "Internal Error", 38 "This server could not process your request due to an interal error."), 39 TempFail => Error("500 Internal Error", "Temporary Failure", 40 "The object %s is currently inaccessible.<p>Please try again later."), 41 Unimp => Error("501 Not implemented", "Command not implemented", 42 "This server does not implement the %s command."), 43 UnkVers => Error("501 Not Implemented", "Unknown http version", 44 "This server does not know how to respond to http version %s."), 45 BadCont => Error("501 Not Implemented", "Impossible format", 46 "This server cannot produce %s in any of the formats your client accepts."), 47 BadReq => Error("400 Bad Request", "Strange Request", 48 "Your client sent a query that this server could not understand."), 49 Syntax => Error("400 Bad Request", "Garbled Syntax", 50 "Your client sent a query with incoherent syntax."), 51 BadSearch =>Error("400 Bad Request", "Inapplicable Search", 52 "Your client sent a search that cannot be applied to %s."), 53 NotFound =>Error("404 Not Found", "Object not found", 54 "The object %s does not exist on this server."), 55 NoSearch => Error("403 Forbidden", "Search not supported", 56 "The object %s does not support the search command."), 57 OnlySearch =>Error("403 Forbidden", "Searching Only", 58 "The object %s only supports the searching methods."), 59 Unauth => Error("401 Unauthorized", "Unauthorized", 60 "You are not authorized to see the object %s."), 61 OK => Error("200 OK", "everything is fine","Groovy man"), 62}; 63 64badmodule(p: string) 65{ 66 sys->fprint(sys->fildes(2), "parse: cannot load %s: %r", p); 67 raise "fail:bad module"; 68} 69 70lock: ref Semaphore; 71 72init() 73{ 74 sys = load Sys Sys->PATH; 75 76 date = load Date Date->PATH; 77 if (date==nil) badmodule(Date->PATH); 78 79 daytime = load Daytime Daytime->PATH; 80 if(daytime == nil) badmodule(Daytime->PATH); 81 82 contents = load Contents Contents->PATH; 83 if(contents == nil) badmodule(Contents->PATH); 84 85 str = load String String->PATH; 86 if(str == nil) badmodule(String->PATH); 87 88 alarms = load Alarms Alarms->PATH; 89 if(alarms == nil) badmodule(Alarms->PATH); 90 91 locks = load Lock Lock->PATH; 92 if(locks == nil) badmodule(Lock->PATH); 93 locks->init(); 94 lock = Semaphore.new(); 95 date->init(); 96} 97 98atexit(g: ref Private_info) 99{ 100 if (g.dbg_log!=nil){ 101 sys->fprint(g.dbg_log,"At exit from parse, closing fds. \n"); 102 } 103 if (g.bin!=nil) 104 g.bufio->g.bin.close(); 105 if (g.bout!=nil) 106 g.bufio->g.bout.close(); 107 g.bin=nil; 108 g.bout=nil; 109 exit; 110} 111 112 113httpheaders(g: ref Private_info,vers : string) 114{ 115 if(vers == "") 116 return; 117 g.tok = '\n'; 118 # 15 minutes to get request line 119 a := Alarm.alarm(15*1000*60); 120 while(lex(g) != '\n'){ 121 if(g.tok == Word && lex(g) == ':'){ 122 if (g.dbg_log!=nil) 123 sys->fprint(g.dbg_log,"hitting parsejump. wordval is %s\n", 124 g.wordval); 125 parsejump(g,g.wordval); 126 } 127 while(g.tok != '\n') 128 lex(g); 129 } 130 a.stop(); 131} 132 133 134mimeok(g: ref Private_info,name : string,multipart : int,head : list of ref Content): list of ref Content 135{ 136 137 generic, specific, s : string; 138 v : real; 139 140 while(lex(g) != Word) 141 if(g.tok != ',') 142 return head; 143 144 generic = g.wordval; 145 lex(g); 146 if(g.tok == '/' || multipart){ 147 if(g.tok != '/') 148 return head; 149 if(lex(g) != Word) 150 return head; 151 specific = g.wordval; 152 lex(g); 153 }else 154 specific = "*"; 155 tmp := contents->mkcontent(generic, specific); 156 head = tmp::head; 157 for(;;){ 158 case g.tok { 159 ';' => 160 if(lex(g) == Word){ 161 s = g.wordval; 162 if(lex(g) != '=' || lex(g) != Word) 163 return head; 164 v = 3.14; # should be strtof(g.wordval, nil); 165 if(s=="q") 166 tmp.q = v; 167 else 168 logit(g,sys->sprint( 169 "unknown %s param: %s %s", 170 name, s, g.wordval)); 171 } 172 break; 173 ',' => 174 return mimeok(g,name, multipart,head); 175 * => 176 return head; 177 } 178 lex(g); 179 } 180 return head; 181} 182 183mimeaccept(g: ref Private_info,name : string) 184{ 185 g.oktype = mimeok(g,name, 1, g.oktype); 186} 187 188mimeacceptenc(g: ref Private_info,name : string) 189{ 190 g.okencode = mimeok(g,name, 0, g.okencode); 191} 192 193mimeacceptlang(g: ref Private_info,name : string) 194{ 195 g.oklang = mimeok(g,name, 0, g.oklang); 196} 197 198mimemodified(g: ref Private_info,name : string) 199{ 200 lexhead(g); 201 g.modtime = date->date2sec(g.wordval); 202 if (g.dbg_log!=nil){ 203 sys->fprint(g.dbg_log,"modtime %d\n",g.modtime); 204 } 205 if(g.modtime == 0) 206 logit(g,sys->sprint("%s: %s", name, g.wordval)); 207} 208 209 210mimeagent(g: ref Private_info,nil : string) 211{ 212 lexhead(g); 213 g.client = g.wordval; 214} 215 216mimefrom(g: ref Private_info,nil : string) 217{ 218 lexhead(g); 219} 220 221 222mimehost(g: ref Private_info,nil : string) 223{ 224 h : string; 225 lexhead(g); 226 (nil,h)=str->splitr(g.wordval," \t"); 227 g.host = h; 228} 229 230mimereferer(g: ref Private_info,nil : string) 231{ 232 h : string; 233 lexhead(g); 234 (nil,h)=str->splitr(g.wordval," \t"); 235 g.referer = h; 236} 237 238mimeclength(g: ref Private_info,nil : string) 239{ 240 h : string; 241 lexhead(g); 242 (nil,h)=str->splitr(g.wordval," \t"); 243 g.clength = int h; 244} 245 246mimectype(g: ref Private_info,nil : string) 247{ 248 h : string; 249 lexhead(g); 250 (nil,h)=str->splitr(g.wordval," \t"); 251 g.ctype = h; 252} 253 254 255mimeignore(g: ref Private_info,nil : string) 256{ 257 lexhead(g); 258} 259 260 261mimeunknown(g: ref Private_info,name : string) 262{ 263 lexhead(g); 264 if(g.client!="") 265 logit(g,sys->sprint("agent %s: ignoring header %s: %s ", 266 g.client, name, g.wordval)); 267 else 268 logit(g,sys->sprint("ignoring header %s: %s", name, g.wordval)); 269} 270 271 272parsejump(g: ref Private_info,k : string) 273{ 274 case k { 275 276 "from" => 277 mimefrom(g,k); 278 "if-modified-since" => 279 mimemodified(g,k); 280 "accept" => 281 mimeaccept(g,k); 282 "accept-encoding" => 283 mimeacceptenc(g,k); 284 "accept-language" => 285 mimeacceptlang(g,k); 286 "user-agent" => 287 mimeagent(g,k); 288 "host" => 289 mimehost(g,k); 290 "referer" => 291 mimereferer(g,k); 292 "content-length" => 293 mimeclength(g,k); 294 "content-type" => 295 mimectype(g,k); 296 "authorization" or "chargeto" or "connection" or "forwarded" or 297 "pragma" or "proxy-agent" or "proxy-connection" or 298 "x-afs-tokens" or "x-serial-number" => 299 mimeignore(g,k); 300 * => 301 mimeunknown(g,k); 302 }; 303} 304 305lex(g: ref Private_info): int 306{ 307 g.tok = lex1(g); 308 return g.tok; 309} 310 311 312# rfc 822/rfc 1521 lexical analyzer 313lex1(g: ref Private_info): int 314{ 315 level, c : int; 316 if(g.parse_eof) 317 return '\n'; 318 319# top: 320 for(;;){ 321 c = getc(g); 322 case c { 323 '(' => 324 level = 1; 325 while((c = getc(g)) != Bufio->EOF){ 326 if(c == '\\'){ 327 c = getc(g); 328 if(c == Bufio->EOF) 329 return '\n'; 330 continue; 331 } 332 if(c == '(') 333 level++; 334 else if(c == ')' && level == 1){ 335 level--; 336 break; 337 } 338 else if(c == '\n'){ 339 c = getc(g); 340 if(c == Bufio->EOF) 341 return '\n'; 342 break; 343 if(c != ' ' && c != '\t'){ 344 ungetc(g); 345 return '\n'; 346 } 347 } 348 } 349 ' ' or '\t' or '\r' => 350 break; 351 '\n' => 352 if(g.tok == '\n'){ 353 g.parse_eof = 1; 354 return '\n'; 355 } 356 c = getc(g); 357 if(c == Bufio->EOF) 358 return '\n'; 359 if(c != ' ' && c != '\t'){ 360 ungetc(g); 361 return '\n'; 362 } 363 ')' or '<' or '>' or '[' or ']' or '@' or '/' or ',' 364 or ';' or ':' or '?' or '=' => 365 return c; 366 367 '"' => 368 word(g,"\""); 369 getc(g); # skip the closing quote 370 return Word; 371 372 * => 373 ungetc(g); 374 word(g,"\"()<>@,;:/[]?=\r\n \t"); 375 return Word; 376 } 377 } 378 return 0; 379} 380 381# return the rest of an rfc 822, not including \r or \n 382# do not map to lower case 383 384lexhead(g: ref Private_info) 385{ 386 c, n: int; 387 n = 0; 388 while((c = getc(g)) != Bufio->EOF){ 389 if(c == '\r') 390 c = wordcr(g); 391 else if(c == '\n') 392 c = wordnl(g); 393 if(c == '\n') 394 break; 395 if(c == '\\'){ 396 c = getc(g); 397 if(c == Bufio->EOF) 398 break; 399 } 400 g.wordval[n++] = c; 401 } 402 g.tok = '\n'; 403 g.wordval= g.wordval[0:n]; 404} 405 406word(g: ref Private_info,stop : string) 407{ 408 c : int; 409 n := 0; 410 while((c = getc(g)) != Bufio->EOF){ 411 if(c == '\r') 412 c = wordcr(g); 413 else if(c == '\n') 414 c = wordnl(g); 415 if(c == '\\'){ 416 c = getc(g); 417 if(c == Bufio->EOF) 418 break; 419 }else if(str->in(c,stop)){ 420 ungetc(g); 421 g.wordval = g.wordval[0:n]; 422 return; 423 } 424 if(c >= 'A' && c <= 'Z') 425 c += 'a' - 'A'; 426 g.wordval[n++] = c; 427 } 428 g.wordval = g.wordval[0:n]; 429 # sys->print("returning from word"); 430} 431 432 433wordcr(g: ref Private_info): int 434{ 435 c := getc(g); 436 if(c == '\n') 437 return wordnl(g); 438 ungetc(g); 439 return ' '; 440} 441 442 443wordnl(g: ref Private_info): int 444{ 445 c := getc(g); 446 if(c == ' ' || c == '\t') 447 return c; 448 ungetc(g); 449 return '\n'; 450} 451 452 453getc(g: ref Private_info): int 454{ 455 c := g.bufio->g.bin.getc(); 456 if(c == Bufio->EOF){ 457 g.parse_eof = 1; 458 return c; 459 } 460 return c & 16r7f; 461} 462 463ungetc(g: ref Private_info) 464{ 465 # this is a dirty hack, I am tacitly assuming that characters read 466 # from stdin will be ASCII..... 467 g.bufio->g.bin.ungetc(); 468} 469 470# go from url with ascii and %xx escapes to unicode, allowing for existing unencoded utf-8 471 472urlunesc(s : string): string 473{ 474 a := array[Sys->UTFmax*len s] of byte; 475 o := 0; 476 for(i := 0; i < len s; i++){ 477 c := int s[i]; 478 if(c < Runeself){ 479 if(c == '%' && i+2 < len s){ 480 d0 := hex(int s[i+1]); 481 if(d0 >= 0){ 482 d1 := hex(int s[i+2]); 483 if(d1 >= 0){ 484 i += 2; 485 c = d0*16 + d1; 486 } 487 } 488 } else if(c == '+' || c == 0) 489 c = ' '; 490 a[o++] = byte c; 491 }else 492 o += sys->char2byte(c, a, o); 493 } 494 return string a[0: o]; 495} 496 497hex(c: int): int 498{ 499 if(c >= '0' && c <= '9') 500 return c-'0'; 501 if(c >= 'a' && c <= 'f') 502 return c-'a' + 10; 503 if(c >= 'A' && c <= 'F') 504 return c-'A' + 10; 505 return -1; 506} 507 508# write a failure message to the net and exit 509fail(g: ref Private_info,reason : int, message : string) 510{ 511 verb : string; 512 title:=sys->sprint("<head><title>%s</title></head>\n<body bgcolor=#ffffff>\n", 513 errormsg[reason].concise); 514 body1:= "<h1> Error </h1>\n<P>" + 515 "Sorry, Charon is unable to process your request. The webserver reports"+ 516 " the following error <P><b>"; 517 #concise error 518 body2:="</b><p>for the URL\n<P><b>"; 519 #message 520 body3:="</b><P>with the following reason:\n<P><b>"; 521 #reason 522 if (str->in('%',errormsg[reason].verbose)){ 523 (v1,v2):=str->splitl(errormsg[reason].verbose,"%"); 524 verb=v1+message+v2[2:]; 525 }else 526 verb=errormsg[reason].verbose; 527 body4:="</b><hr> This Webserver powered by <img src=\"/inferno.gif\">. <P>"+ 528 "For more information click <a href=\"http://inferno.lucent.com\"> here </a>\n"+ 529 "<hr><address>\n"; 530 dtime:=sys->sprint("This information processed at %s.\n",daytime->time()); 531 body5:="</address>\n</body>\n"; 532 strbuf:=title+body1+errormsg[reason].concise+body2+message+body3+ 533 verb+body4+dtime+body5; 534 if (g.bout!=nil && reason!=2){ 535 g.bufio->g.bout.puts(sys->sprint("%s %s\r\n", g.version, errormsg[reason].num)); 536 g.bufio->g.bout.puts(sys->sprint("Date: %s\r\n", daytime->time())); 537 g.bufio->g.bout.puts(sys->sprint("Server: Charon\r\n")); 538 g.bufio->g.bout.puts(sys->sprint("MIME-version: 1.0\r\n")); 539 g.bufio->g.bout.puts(sys->sprint("Content-Type: text/html\r\n")); 540 g.bufio->g.bout.puts(sys->sprint("Content-Length: %d\r\n", len strbuf)); 541 g.bufio->g.bout.puts(sys->sprint("\r\n")); 542 g.bufio->g.bout.puts(strbuf); 543 g.bufio->g.bout.flush(); 544 } 545 logit(g,sys->sprint("failing: %s", errormsg[reason].num)); 546 atexit(g); 547} 548 549 550# write successful header 551 552okheaders(g: ref Private_info) 553{ 554 g.bufio->g.bout.puts(sys->sprint("%s 200 OK\r\n", g.version)); 555 g.bufio->g.bout.puts("Server: Charon\r\n"); 556 g.bufio->g.bout.puts("MIME-version: 1.0\r\n"); 557} 558 559notmodified(g: ref Private_info) 560{ 561 g.bufio->g.bout.puts(sys->sprint("%s 304 Not Modified\r\n", g.version)); 562 g.bufio->g.bout.puts("Server: Charon\r\n"); 563 g.bufio->g.bout.puts("MIME-version: 1.0\r\n\r\n"); 564 atexit(g); 565} 566 567logit(g: ref Private_info,message : string ) 568{ 569 lock.obtain(); 570 sys->fprint(g.logfile,"%s %s\n", g.remotesys, message); 571 lock.release(); 572} 573 574urlconv(p : string): string 575{ 576 a := array[Sys->UTFmax] of byte; 577 t := ""; 578 for(i := 0; i < len p; i++){ 579 c := p[i]; 580 if(c == 0) 581 continue; # ignore nul bytes 582 if(c >= Runeself){ # convert to UTF-8 583 n := sys->char2byte(c, a, 0); 584 for(j := 0; j < n; j++) 585 t += sys->sprint("%%%.2X", int a[j]); 586 }else if(c <= ' ' || c == '%'){ 587 t += sys->sprint("%%%2.2X", c); 588 } else { 589 t[len t] = c; 590 } 591 } 592 return t; 593} 594