1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include "httpd.h" 5 #include "httpsrv.h" 6 7 enum{ URLmax = 65536, HINTmax = 20 }; 8 #define RECIPLOG2 1.44269504089 9 10 char **urlname; /* array of url strings 1,...,nurl */ 11 static int nurl; 12 static uint urltab[URLmax]; /* hashstr(url) 1,...,nurl */ 13 static int urlnext[URLmax]; /* index urltab of next url in chain */ 14 static int urlhash[URLmax]; /* initially 0, meaning empty buckets */ 15 16 typedef struct Hint { 17 ushort url; 18 uchar prob; 19 } Hint; 20 Hint *hints[URLmax]; 21 uchar nhint[URLmax]; 22 23 vlong 24 Bfilelen(void *vb) 25 { 26 Biobuf *b; 27 vlong n; 28 29 b = vb; 30 n = Bseek(b, 0L, 2); 31 Bseek(b, 0L, 0); 32 return n; 33 } 34 35 static uint 36 hashstr(char* key) 37 { 38 /* asu works better than pjw for urls */ 39 uchar *k = (unsigned char*)key; 40 uint h = 0; 41 while(*k!=0) 42 h = 65599*h + *k++; 43 return h; 44 } 45 46 static int 47 urllookup(uint url) 48 { 49 /* returns +index into urltab, else -hash */ 50 int j, hash; 51 52 hash = 1 + url%(URLmax-1); 53 j = urlhash[hash]; 54 for(;;){ 55 if(j==0) 56 return -hash; 57 if(url==urltab[j]) 58 return j; 59 j = urlnext[j]; 60 } 61 } 62 63 int 64 Bage(Biobuf *b) 65 { 66 Dir *dir; 67 long mtime; 68 69 dir = dirfstat(Bfildes(b)); 70 if(dir != nil) 71 mtime = dir->mtime; 72 else 73 mtime = 0; 74 free(dir); 75 return time(nil) - mtime; 76 } 77 78 void 79 urlinit(void) 80 { 81 static Biobuf *b = nil; 82 static vlong filelen = 0; 83 vlong newlen; 84 char *s, *arena; 85 int i, j, n; 86 uint url; 87 char *file; 88 89 if(filelen < 0) 90 return; 91 file = "/sys/log/httpd/url"; 92 if(b == nil){ 93 b = Bopen(file, OREAD); /* first time */ 94 if(b == nil){ 95 syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file); 96 filelen = -1; 97 return; 98 } 99 } 100 newlen = Bfilelen(b); /* side effect: rewinds b */ 101 if(newlen == filelen || Bage(b)<300) 102 return; 103 filelen = newlen; 104 if(filelen < 0) 105 return; 106 if(nurl){ /* free existing tables */ 107 free(urlname[0]); /* arena */ 108 memset(urlhash,0,sizeof urlhash); 109 memset(urlnext,0,sizeof urlnext); 110 nurl = 0; 111 } 112 if(urlname==nil) 113 urlname = (char**)ezalloc(URLmax*sizeof(*urlname)); 114 arena = (char*)ezalloc(filelen); /* enough for all the strcpy below */ 115 i = 1; 116 while((s=Brdline(b,'\n'))!=0){ 117 /* read lines of the form: 999 /url/path */ 118 n = Blinelen(b) - 1; 119 if(n>2 && s[n]=='\n'){ 120 s[n] = '\0'; 121 }else{ 122 sysfatal("missing fields or newline in url-db"); 123 } 124 j = strtoul(s,&s,10); 125 while(*s==' ') 126 s++; 127 if(i++!=j) 128 sysfatal("url-db synchronization error"); 129 url = hashstr(s); 130 j = urllookup(url); 131 if(j>=0) 132 sysfatal("duplicate url"); 133 j = -j; 134 nurl++; 135 if(nurl>=URLmax){ 136 syslog(0, HTTPLOG, "urlinit overflow at %s",s); 137 free(urlname[0]); /* arena */ 138 memset(urlhash,0,sizeof urlhash); 139 memset(urlnext,0,sizeof urlnext); 140 nurl = 0; 141 return; 142 } 143 urltab[nurl] = url; 144 urlnext[nurl] = urlhash[j]; 145 urlhash[j] = nurl; 146 strcpy(arena,s); 147 urlname[nurl] = arena; 148 arena += strlen(s)+1; 149 } 150 syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen)); 151 /* b is held open, because namespace will be chopped */ 152 } 153 154 void 155 statsinit(void) 156 { 157 static Biobuf *b = nil; 158 static vlong filelen = 0; 159 vlong newlen; 160 int iq, n, i, nstats = 0; 161 uchar *s, buf[3+HINTmax*3]; /* iq, n, (url,prob)... */ 162 Hint *arena, *h; 163 char *file; 164 static void *oldarena = nil; 165 166 file = "/sys/log/httpd/pathstat"; 167 if(b == nil){ 168 if(filelen == -1) 169 return; /* if failed first time */ 170 b = Bopen(file, OREAD); /* first time */ 171 if(b == nil){ 172 syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file); 173 filelen = -1; 174 return; 175 } 176 } 177 newlen = Bfilelen(b); /* side effect: rewinds b */ 178 if(newlen == filelen || Bage(b)<300) 179 return; 180 filelen = newlen; 181 if(oldarena){ 182 free(oldarena); 183 memset(nhint,0,sizeof nhint); 184 } 185 arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint)); 186 oldarena = arena; 187 for(;;){ 188 i = Bread(b,buf,3); 189 if(i<3) 190 break; 191 nstats++; 192 iq = buf[0]; 193 iq = (iq<<8) | buf[1]; 194 n = buf[2]; 195 h = arena; 196 arena += n; 197 hints[iq] = h; 198 nhint[iq] = n; 199 if(Bread(b,buf,3*n)!=3*n) 200 sysfatal("stats read error"); 201 for(i=0; i<n; i++){ 202 s = &buf[3*i]; 203 h[i].url = (s[0]<<8) | s[1]; 204 h[i].prob = s[2]; 205 } 206 } 207 syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint))); 208 } 209 210 void 211 urlcanon(char *url) 212 { 213 /* all the changes here can be implemented by rewriting in-place */ 214 char *p, *q; 215 216 /* remove extraneous '/' in the middle and at the end */ 217 p = url+1; /* first char needs no change */ 218 q = p; 219 while(q[0]){ 220 if(q[0]=='/' && q[-1]=='/'){ 221 q++; 222 continue; 223 } 224 *p++ = *q++; 225 } 226 if(q[-1]=='/'){ /* trailing '/' */ 227 p[-1] = '\0'; 228 }else{ 229 p[0] = '\0'; 230 } 231 232 /* specific to the cm.bell-labs.com web site */ 233 if(strncmp(url,"/cm/",4)==0){ 234 if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0) 235 /* strip off /cm/cs */ 236 memmove(url,url+6,strlen(url+6)+1); 237 else if(strncmp(url+4,"ms/what/wavelet",15)==0) 238 /* /cm/ms/what */ 239 memmove(url,url+11,strlen(url+11)+1); 240 } 241 } 242 243 void 244 hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej) 245 { 246 int i, j, pr, prefix, fd, siz, havei, newhint = 0, n; 247 char *query, *sf, etag[32], *wurl; 248 Dir *dir; 249 Hint *h, *haveh; 250 251 query = hstrdup(hc, uri); 252 urlcanon(query); 253 j = urllookup(hashstr(query)); 254 if(j < 0) 255 return; 256 query = strrchr(uri,'/'); 257 if(!query) 258 return; /* can't happen */ 259 prefix = query-uri+1; /* = strlen(dirname)+1 */ 260 h = hints[j]; 261 for(i=0; i<nhint[j]; i++){ 262 if(havej > 0 && havej < URLmax){ /* exclude hints client has */ 263 haveh = hints[havej]; 264 for(havei=0; havei<nhint[havej]; havei++) 265 if( haveh[havei].url == h[i].url) 266 goto continuei; 267 } 268 sf = urlname[h[i].url]; 269 pr = h[i].prob; 270 if(pr<thresh) 271 break; 272 n = strlen(webroot) + strlen(sf) + 1; 273 wurl = halloc(hc, n); 274 strcpy(wurl, webroot); 275 strcat(wurl, sf); 276 fd = open(wurl, OREAD); 277 if(fd<0) 278 continue; 279 dir = dirfstat(fd); 280 if(dir == nil){ 281 close(fd); 282 continue; 283 } 284 close(fd); 285 snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers); 286 siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999); 287 free(dir); 288 if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0) 289 sf = sf+prefix; 290 hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf); 291 newhint++; 292 continuei: ; 293 } 294 if(newhint) 295 hprint(hout, "Fresh: have/%d\r\n", j); 296 } 297 298