1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "httpd.h"
5 #include "httpsrv.h"
6
7 enum{ URLmax = 65536, HINTmax = 20 };
8 #define RECIPLOG2 1.44269504089
9
10 char **urlname; /* array of url strings 1,...,nurl */
11 static int nurl;
12 static uint urltab[URLmax]; /* hashstr(url) 1,...,nurl */
13 static int urlnext[URLmax]; /* index urltab of next url in chain */
14 static int urlhash[URLmax]; /* initially 0, meaning empty buckets */
15
16 typedef struct Hint {
17 ushort url;
18 uchar prob;
19 } Hint;
20 Hint *hints[URLmax];
21 uchar nhint[URLmax];
22
23 vlong
Bfilelen(void * vb)24 Bfilelen(void *vb)
25 {
26 Biobuf *b;
27 vlong n;
28
29 b = vb;
30 n = Bseek(b, 0L, 2);
31 Bseek(b, 0L, 0);
32 return n;
33 }
34
35 static uint
hashstr(char * key)36 hashstr(char* key)
37 {
38 /* asu works better than pjw for urls */
39 uchar *k = (unsigned char*)key;
40 uint h = 0;
41 while(*k!=0)
42 h = 65599*h + *k++;
43 return h;
44 }
45
46 static int
urllookup(uint url)47 urllookup(uint url)
48 {
49 /* returns +index into urltab, else -hash */
50 int j, hash;
51
52 hash = 1 + url%(URLmax-1);
53 j = urlhash[hash];
54 for(;;){
55 if(j==0)
56 return -hash;
57 if(url==urltab[j])
58 return j;
59 j = urlnext[j];
60 }
61 }
62
63 int
Bage(Biobuf * b)64 Bage(Biobuf *b)
65 {
66 Dir *dir;
67 long mtime;
68
69 dir = dirfstat(Bfildes(b));
70 if(dir != nil)
71 mtime = dir->mtime;
72 else
73 mtime = 0;
74 free(dir);
75 return time(nil) - mtime;
76 }
77
78 void
urlinit(void)79 urlinit(void)
80 {
81 static Biobuf *b = nil;
82 static vlong filelen = 0;
83 vlong newlen;
84 char *s, *arena;
85 int i, j, n;
86 uint url;
87 char *file;
88
89 if(filelen < 0)
90 return;
91 file = "/sys/log/httpd/url";
92 if(b == nil){
93 b = Bopen(file, OREAD); /* first time */
94 if(b == nil){
95 syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
96 filelen = -1;
97 return;
98 }
99 }
100 newlen = Bfilelen(b); /* side effect: rewinds b */
101 if(newlen == filelen || Bage(b)<300)
102 return;
103 filelen = newlen;
104 if(filelen < 0)
105 return;
106 if(nurl){ /* free existing tables */
107 free(urlname[0]); /* arena */
108 memset(urlhash,0,sizeof urlhash);
109 memset(urlnext,0,sizeof urlnext);
110 nurl = 0;
111 }
112 if(urlname==nil)
113 urlname = (char**)ezalloc(URLmax*sizeof(*urlname));
114 arena = (char*)ezalloc(filelen); /* enough for all the strcpy below */
115 i = 1;
116 while((s=Brdline(b,'\n'))!=0){
117 /* read lines of the form: 999 /url/path */
118 n = Blinelen(b) - 1;
119 if(n>2 && s[n]=='\n'){
120 s[n] = '\0';
121 }else{
122 sysfatal("missing fields or newline in url-db");
123 }
124 j = strtoul(s,&s,10);
125 while(*s==' ')
126 s++;
127 if(i++!=j)
128 sysfatal("url-db synchronization error");
129 url = hashstr(s);
130 j = urllookup(url);
131 if(j>=0)
132 sysfatal("duplicate url");
133 j = -j;
134 nurl++;
135 if(nurl>=URLmax){
136 syslog(0, HTTPLOG, "urlinit overflow at %s",s);
137 free(urlname[0]); /* arena */
138 memset(urlhash,0,sizeof urlhash);
139 memset(urlnext,0,sizeof urlnext);
140 nurl = 0;
141 return;
142 }
143 urltab[nurl] = url;
144 urlnext[nurl] = urlhash[j];
145 urlhash[j] = nurl;
146 strcpy(arena,s);
147 urlname[nurl] = arena;
148 arena += strlen(s)+1;
149 }
150 syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen));
151 /* b is held open, because namespace will be chopped */
152 }
153
154 void
statsinit(void)155 statsinit(void)
156 {
157 static Biobuf *b = nil;
158 static vlong filelen = 0;
159 vlong newlen;
160 int iq, n, i, nstats = 0;
161 uchar *s, buf[3+HINTmax*3]; /* iq, n, (url,prob)... */
162 Hint *arena, *h;
163 char *file;
164 static void *oldarena = nil;
165
166 file = "/sys/log/httpd/pathstat";
167 if(b == nil){
168 if(filelen == -1)
169 return; /* if failed first time */
170 b = Bopen(file, OREAD); /* first time */
171 if(b == nil){
172 syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
173 filelen = -1;
174 return;
175 }
176 }
177 newlen = Bfilelen(b); /* side effect: rewinds b */
178 if(newlen == filelen || Bage(b)<300)
179 return;
180 filelen = newlen;
181 if(oldarena){
182 free(oldarena);
183 memset(nhint,0,sizeof nhint);
184 }
185 arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint));
186 oldarena = arena;
187 for(;;){
188 i = Bread(b,buf,3);
189 if(i<3)
190 break;
191 nstats++;
192 iq = buf[0];
193 iq = (iq<<8) | buf[1];
194 n = buf[2];
195 h = arena;
196 arena += n;
197 hints[iq] = h;
198 nhint[iq] = n;
199 if(Bread(b,buf,3*n)!=3*n)
200 sysfatal("stats read error");
201 for(i=0; i<n; i++){
202 s = &buf[3*i];
203 h[i].url = (s[0]<<8) | s[1];
204 h[i].prob = s[2];
205 }
206 }
207 syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint)));
208 }
209
210 void
urlcanon(char * url)211 urlcanon(char *url)
212 {
213 /* all the changes here can be implemented by rewriting in-place */
214 char *p, *q;
215
216 /* remove extraneous '/' in the middle and at the end */
217 p = url+1; /* first char needs no change */
218 q = p;
219 while(q[0]){
220 if(q[0]=='/' && q[-1]=='/'){
221 q++;
222 continue;
223 }
224 *p++ = *q++;
225 }
226 if(q[-1]=='/'){ /* trailing '/' */
227 p[-1] = '\0';
228 }else{
229 p[0] = '\0';
230 }
231
232 /* specific to the cm.bell-labs.com web site */
233 if(strncmp(url,"/cm/",4)==0){
234 if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0)
235 /* strip off /cm/cs */
236 memmove(url,url+6,strlen(url+6)+1);
237 else if(strncmp(url+4,"ms/what/wavelet",15)==0)
238 /* /cm/ms/what */
239 memmove(url,url+11,strlen(url+11)+1);
240 }
241 }
242
243 void
hintprint(HConnect * hc,Hio * hout,char * uri,int thresh,int havej)244 hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej)
245 {
246 int i, j, pr, prefix, fd, siz, havei, newhint = 0, n;
247 char *query, *sf, etag[32], *wurl;
248 Dir *dir;
249 Hint *h, *haveh;
250
251 query = hstrdup(hc, uri);
252 urlcanon(query);
253 j = urllookup(hashstr(query));
254 if(j < 0)
255 return;
256 query = strrchr(uri,'/');
257 if(!query)
258 return; /* can't happen */
259 prefix = query-uri+1; /* = strlen(dirname)+1 */
260 h = hints[j];
261 for(i=0; i<nhint[j]; i++){
262 if(havej > 0 && havej < URLmax){ /* exclude hints client has */
263 haveh = hints[havej];
264 for(havei=0; havei<nhint[havej]; havei++)
265 if( haveh[havei].url == h[i].url)
266 goto continuei;
267 }
268 sf = urlname[h[i].url];
269 pr = h[i].prob;
270 if(pr<thresh)
271 break;
272 n = strlen(webroot) + strlen(sf) + 1;
273 wurl = halloc(hc, n);
274 strcpy(wurl, webroot);
275 strcat(wurl, sf);
276 fd = open(wurl, OREAD);
277 if(fd<0)
278 continue;
279 dir = dirfstat(fd);
280 if(dir == nil){
281 close(fd);
282 continue;
283 }
284 close(fd);
285 snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers);
286 siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999);
287 free(dir);
288 if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0)
289 sf = sf+prefix;
290 hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf);
291 newhint++;
292 continuei: ;
293 }
294 if(newhint)
295 hprint(hout, "Fresh: have/%d\r\n", j);
296 }
297
298