1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include "httpd.h" 5 #include "httpsrv.h" 6 7 enum{ URLmax = 65536, HINTmax = 20 }; 8 #define RECIPLOG2 1.44269504089 9 10 char **urlname; /* array of url strings 1,...,nurl */ 11 static int nurl; 12 static uint urltab[URLmax]; /* hashstr(url) 1,...,nurl */ 13 static int urlnext[URLmax]; /* index urltab of next url in chain */ 14 static int urlhash[URLmax]; /* initially 0, meaning empty buckets */ 15 16 typedef struct Hint { 17 ushort url; 18 uchar prob; 19 } Hint; 20 Hint *hints[URLmax]; 21 uchar nhint[URLmax]; 22 23 vlong 24 Bfilelen(void *vb) 25 { 26 Biobuf *b; 27 vlong n; 28 29 b = vb; 30 n = Bseek(b, 0L, 2); 31 Bseek(b, 0L, 0); 32 return n; 33 } 34 35 static uint 36 hashstr(char* key) 37 { 38 /* asu works better than pjw for urls */ 39 uchar *k = (unsigned char*)key; 40 uint h = 0; 41 while(*k!=0) 42 h = 65599*h + *k++; 43 return h; 44 } 45 46 static int 47 urllookup(uint url) 48 { 49 /* returns +index into urltab, else -hash */ 50 int j, hash; 51 52 hash = 1 + url%(URLmax-1); 53 j = urlhash[hash]; 54 while(1){ 55 if(j==0) 56 return -hash; 57 if(url==urltab[j]) 58 return j; 59 j = urlnext[j]; 60 } 61 return 0; /* not reached */ 62 } 63 64 int 65 Bage(Biobuf *b) 66 { 67 Dir *dir; 68 long mtime; 69 70 dir = dirfstat(Bfildes(b)); 71 if(dir != nil) 72 mtime = dir->mtime; 73 else 74 mtime = 0; 75 free(dir); 76 return time(nil) - mtime; 77 } 78 79 void 80 urlinit(void) 81 { 82 static Biobuf *b = nil; 83 static vlong filelen = 0; 84 vlong newlen; 85 char *s, *arena; 86 int i, j, n; 87 uint url; 88 char *file; 89 90 if(filelen < 0) 91 return; 92 file = "/sys/log/httpd/url"; 93 if(b == nil){ 94 b = Bopen(file, OREAD); /* first time */ 95 if(b == nil){ 96 syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file); 97 filelen = -1; 98 return; 99 } 100 } 101 newlen = Bfilelen(b); /* side effect: rewinds b */ 102 if(newlen == filelen || Bage(b)<300) 103 return; 104 filelen = newlen; 105 if(filelen < 0) 106 return; 107 if(nurl){ /* free existing tables */ 108 free(urlname[0]); /* arena */ 109 memset(urlhash,0,sizeof urlhash); 110 memset(urlnext,0,sizeof urlnext); 111 nurl = 0; 112 } 113 if(urlname==nil) 114 urlname = (char**)ezalloc(URLmax*sizeof(*urlname)); 115 arena = (char*)ezalloc(filelen); /* enough for all the strcpy below */ 116 i = 1; 117 while((s=Brdline(b,'\n'))!=0){ 118 /* read lines of the form: 999 /url/path */ 119 n = Blinelen(b) - 1; 120 if(n>2 && s[n]=='\n'){ 121 s[n] = '\0'; 122 }else{ 123 sysfatal("missing fields or newline in url-db"); 124 } 125 j = strtoul(s,&s,10); 126 while(*s==' ') 127 s++; 128 if(i++!=j) 129 sysfatal("url-db synchronization error"); 130 url = hashstr(s); 131 j = urllookup(url); 132 if(j>=0) 133 sysfatal("duplicate url"); 134 j = -j; 135 nurl++; 136 if(nurl>=URLmax){ 137 syslog(0, HTTPLOG, "urlinit overflow at %s",s); 138 free(urlname[0]); /* arena */ 139 memset(urlhash,0,sizeof urlhash); 140 memset(urlnext,0,sizeof urlnext); 141 nurl = 0; 142 return; 143 } 144 urltab[nurl] = url; 145 urlnext[nurl] = urlhash[j]; 146 urlhash[j] = nurl; 147 strcpy(arena,s); 148 urlname[nurl] = arena; 149 arena += strlen(s)+1; 150 } 151 syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen)); 152 /* b is held open, because namespace will be chopped */ 153 } 154 155 void 156 statsinit(void) 157 { 158 static Biobuf *b = nil; 159 static vlong filelen = 0; 160 vlong newlen; 161 int iq, n, i, nstats = 0; 162 uchar *s, buf[3+HINTmax*3]; /* iq, n, (url,prob)... */ 163 Hint *arena, *h; 164 char *file; 165 static void *oldarena = nil; 166 167 file = "/sys/log/httpd/pathstat"; 168 if(b == nil){ 169 if(filelen == -1) 170 return; /* if failed first time */ 171 b = Bopen(file, OREAD); /* first time */ 172 if(b == nil){ 173 syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file); 174 filelen = -1; 175 return; 176 } 177 } 178 newlen = Bfilelen(b); /* side effect: rewinds b */ 179 if(newlen == filelen || Bage(b)<300) 180 return; 181 filelen = newlen; 182 if(oldarena){ 183 free(oldarena); 184 memset(nhint,0,sizeof nhint); 185 } 186 arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint)); 187 oldarena = arena; 188 while(1){ 189 i = Bread(b,buf,3); 190 if(i<3) 191 break; 192 nstats++; 193 iq = buf[0]; 194 iq = (iq<<8) | buf[1]; 195 n = buf[2]; 196 h = arena; 197 arena += n; 198 hints[iq] = h; 199 nhint[iq] = n; 200 if(Bread(b,buf,3*n)!=3*n) 201 sysfatal("stats read error"); 202 for(i=0; i<n; i++){ 203 s = &buf[3*i]; 204 h[i].url = (s[0]<<8) | s[1]; 205 h[i].prob = s[2]; 206 } 207 } 208 syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint))); 209 } 210 211 void 212 urlcanon(char *url) 213 { 214 /* all the changes here can be implemented by rewriting in-place */ 215 char *p, *q; 216 217 /* remove extraneous '/' in the middle and at the end */ 218 p = url+1; /* first char needs no change */ 219 q = p; 220 while(q[0]){ 221 if(q[0]=='/' && q[-1]=='/'){ 222 q++; 223 continue; 224 } 225 *p++ = *q++; 226 } 227 if(q[-1]=='/'){ /* trailing '/' */ 228 p[-1] = '\0'; 229 }else{ 230 p[0] = '\0'; 231 } 232 233 /* specific to the cm.bell-labs.com web site */ 234 if(strncmp(url,"/cm/",4)==0){ 235 if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0) 236 /* strip off /cm/cs */ 237 memmove(url,url+6,strlen(url+6)+1); 238 else if(strncmp(url+4,"ms/what/wavelet",15)==0) 239 /* /cm/ms/what */ 240 memmove(url,url+11,strlen(url+11)+1); 241 } 242 } 243 244 void 245 hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej) 246 { 247 int i, j, pr, prefix, fd, siz, havei, newhint = 0, n; 248 char *query, *sf, etag[32], *wurl; 249 Dir *dir; 250 Hint *h, *haveh; 251 252 query = hstrdup(hc, uri); 253 urlcanon(query); 254 j = urllookup(hashstr(query)); 255 if(j < 0) 256 return; 257 query = strrchr(uri,'/'); 258 if(!query) 259 return; /* can't happen */ 260 prefix = query-uri+1; /* = strlen(dirname)+1 */ 261 h = hints[j]; 262 for(i=0; i<nhint[j]; i++){ 263 if(havej > 0 && havej < URLmax){ /* exclude hints client has */ 264 haveh = hints[havej]; 265 for(havei=0; havei<nhint[havej]; havei++) 266 if( haveh[havei].url == h[i].url) 267 goto continuei; 268 } 269 sf = urlname[h[i].url]; 270 pr = h[i].prob; 271 if(pr<thresh) 272 break; 273 n = strlen(webroot) + strlen(sf) + 1; 274 wurl = halloc(hc, n); 275 strcpy(wurl, webroot); 276 strcat(wurl, sf); 277 fd = open(wurl, OREAD); 278 if(fd<0) 279 continue; 280 dir = dirfstat(fd); 281 if(dir == nil){ 282 close(fd); 283 continue; 284 } 285 close(fd); 286 snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers); 287 siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999); 288 free(dir); 289 if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0) 290 sf = sf+prefix; 291 hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf); 292 newhint++; 293 continuei: ; 294 } 295 if(newhint) 296 hprint(hout, "Fresh: have/%d\r\n", j); 297 } 298 299