xref: /plan9/sys/src/cmd/ip/httpd/hints.c (revision b85a83648eec38fe82b6f00adfd7828ceec5ee8d)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "httpd.h"
5 #include "httpsrv.h"
6 
7 enum{ URLmax = 65536, HINTmax = 20 };
8 #define RECIPLOG2 1.44269504089
9 
10 char **urlname;				/* array of url strings    1,...,nurl */
11 static int nurl;
12 static uint urltab[URLmax];		/* hashstr(url)  1,...,nurl */
13 static int urlnext[URLmax];		/* index urltab of next url in chain */
14 static int urlhash[URLmax];		/* initially 0, meaning empty buckets */
15 
16 typedef struct Hint {
17 	ushort url;
18 	uchar prob;
19 } Hint;
20 Hint *hints[URLmax];
21 uchar nhint[URLmax];
22 
23 vlong
Bfilelen(void * vb)24 Bfilelen(void *vb)
25 {
26 	Biobuf *b;
27 	vlong n;
28 
29 	b = vb;
30 	n = Bseek(b, 0L, 2);
31 	Bseek(b, 0L, 0);
32 	return n;
33 }
34 
35 static uint
hashstr(char * key)36 hashstr(char* key)
37 {
38 	/* asu works better than pjw for urls */
39 	uchar *k = (unsigned char*)key;
40 	uint h = 0;
41 	while(*k!=0)
42 		h = 65599*h + *k++;
43         return h;
44 }
45 
46 static int
urllookup(uint url)47 urllookup(uint url)
48 {
49 	/* returns +index into urltab, else -hash */
50 	int j, hash;
51 
52 	hash = 1 + url%(URLmax-1);
53 	j = urlhash[hash];
54 	for(;;){
55 		if(j==0)
56 			return -hash;
57 		if(url==urltab[j])
58 			return j;
59 		j = urlnext[j];
60 	}
61 }
62 
63 int
Bage(Biobuf * b)64 Bage(Biobuf *b)
65 {
66 	Dir *dir;
67 	long mtime;
68 
69 	dir = dirfstat(Bfildes(b));
70 	if(dir != nil)
71 		mtime = dir->mtime;
72 	else
73 		mtime = 0;
74 	free(dir);
75 	return time(nil) - mtime;
76 }
77 
78 void
urlinit(void)79 urlinit(void)
80 {
81 	static Biobuf *b = nil;
82 	static vlong filelen = 0;
83 	vlong newlen;
84 	char *s, *arena;
85 	int i, j, n;
86 	uint url;
87 	char *file;
88 
89 	if(filelen < 0)
90 		return;
91 	file = "/sys/log/httpd/url";
92 	if(b == nil){
93 		b = Bopen(file, OREAD); /* first time */
94 		if(b == nil){
95 			syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
96 			filelen = -1;
97 			return;
98 		}
99 	}
100 	newlen = Bfilelen(b); /* side effect: rewinds b */
101 	if(newlen == filelen || Bage(b)<300)
102 		return;
103 	filelen = newlen;
104 	if(filelen < 0)
105 		return;
106 	if(nurl){ /* free existing tables */
107 		free(urlname[0]); /* arena */
108 		memset(urlhash,0,sizeof urlhash);
109 		memset(urlnext,0,sizeof urlnext);
110 		nurl = 0;
111 	}
112 	if(urlname==nil)
113 		urlname = (char**)ezalloc(URLmax*sizeof(*urlname));
114 	arena = (char*)ezalloc(filelen);  /* enough for all the strcpy below */
115 	i = 1;
116 	while((s=Brdline(b,'\n'))!=0){
117 		/* read lines of the form:  999 /url/path */
118 		n = Blinelen(b) - 1;
119 		if(n>2 && s[n]=='\n'){
120 			s[n] = '\0';
121 		}else{
122 			sysfatal("missing fields or newline in url-db");
123 		}
124 		j = strtoul(s,&s,10);
125 		while(*s==' ')
126 			s++;
127 		if(i++!=j)
128 			sysfatal("url-db synchronization error");
129 		url = hashstr(s);
130 		j = urllookup(url);
131 		if(j>=0)
132 			sysfatal("duplicate url");
133 		j = -j;
134 		nurl++;
135 		if(nurl>=URLmax){
136 			syslog(0, HTTPLOG, "urlinit overflow at %s",s);
137 			free(urlname[0]); /* arena */
138 			memset(urlhash,0,sizeof urlhash);
139 			memset(urlnext,0,sizeof urlnext);
140 			nurl = 0;
141 			return;
142 		}
143 		urltab[nurl] = url;
144 		urlnext[nurl] = urlhash[j];
145 		urlhash[j] = nurl;
146 		strcpy(arena,s);
147 		urlname[nurl] = arena;
148 		arena += strlen(s)+1;
149 	}
150 	syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen));
151 	/* b is held open, because namespace will be chopped */
152 }
153 
154 void
statsinit(void)155 statsinit(void)
156 {
157 	static Biobuf *b = nil;
158 	static vlong filelen = 0;
159 	vlong newlen;
160 	int iq, n, i, nstats = 0;
161 	uchar *s, buf[3+HINTmax*3];  /* iq, n, (url,prob)... */
162 	Hint *arena, *h;
163 	char *file;
164 	static void *oldarena = nil;
165 
166 	file = "/sys/log/httpd/pathstat";
167 	if(b == nil){
168 		if(filelen == -1)
169 			return; /* if failed first time */
170 		b = Bopen(file, OREAD); /* first time */
171 		if(b == nil){
172 			syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
173 			filelen = -1;
174 			return;
175 		}
176 	}
177 	newlen = Bfilelen(b); /* side effect: rewinds b */
178 	if(newlen == filelen || Bage(b)<300)
179 		return;
180 	filelen = newlen;
181 	if(oldarena){
182 		free(oldarena);
183 		memset(nhint,0,sizeof nhint);
184 	}
185 	arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint));
186 	oldarena = arena;
187 	for(;;){
188 		i = Bread(b,buf,3);
189 		if(i<3)
190 			break;
191 		nstats++;
192 		iq = buf[0];
193 		iq = (iq<<8) | buf[1];
194 		n = buf[2];
195 		h = arena;
196 		arena += n;
197 		hints[iq] = h;
198 		nhint[iq] = n;
199 		if(Bread(b,buf,3*n)!=3*n)
200 			sysfatal("stats read error");
201 		for(i=0; i<n; i++){
202 			s = &buf[3*i];
203 			h[i].url = (s[0]<<8) | s[1];
204 			h[i].prob = s[2];
205 		}
206 	}
207 	syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint)));
208 }
209 
210 void
urlcanon(char * url)211 urlcanon(char *url)
212 {
213 	/* all the changes here can be implemented by rewriting in-place */
214 	char *p, *q;
215 
216 	/* remove extraneous '/' in the middle and at the end */
217 	p = url+1;  /* first char needs no change */
218 	q = p;
219 	while(q[0]){
220 		if(q[0]=='/' && q[-1]=='/'){
221 			q++;
222 			continue;
223 		}
224 		*p++ = *q++;
225 	}
226 	if(q[-1]=='/'){  /* trailing '/' */
227 		p[-1] = '\0';
228 	}else{
229 		p[0] = '\0';
230 	}
231 
232 	/* specific to the cm.bell-labs.com web site */
233 	if(strncmp(url,"/cm/",4)==0){
234 		if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0)
235 			/* strip off /cm/cs */
236 			memmove(url,url+6,strlen(url+6)+1);
237 		else if(strncmp(url+4,"ms/what/wavelet",15)==0)
238 			/* /cm/ms/what */
239 			memmove(url,url+11,strlen(url+11)+1);
240 	}
241 }
242 
243 void
hintprint(HConnect * hc,Hio * hout,char * uri,int thresh,int havej)244 hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej)
245 {
246 	int i, j, pr, prefix, fd, siz, havei, newhint = 0, n;
247 	char *query, *sf, etag[32], *wurl;
248 	Dir *dir;
249 	Hint *h, *haveh;
250 
251 	query = hstrdup(hc, uri);
252 	urlcanon(query);
253 	j = urllookup(hashstr(query));
254 	if(j < 0)
255 		return;
256 	query = strrchr(uri,'/');
257 	if(!query)
258 		return;  /* can't happen */
259 	prefix = query-uri+1;  /* = strlen(dirname)+1 */
260 	h = hints[j];
261 	for(i=0; i<nhint[j]; i++){
262 		if(havej > 0 && havej < URLmax){ /* exclude hints client has */
263 			haveh = hints[havej];
264 			for(havei=0; havei<nhint[havej]; havei++)
265 				if( haveh[havei].url == h[i].url)
266 					goto continuei;
267 		}
268 		sf = urlname[h[i].url];
269 		pr = h[i].prob;
270 		if(pr<thresh)
271 			break;
272 		n = strlen(webroot) + strlen(sf) + 1;
273 		wurl = halloc(hc, n);
274 		strcpy(wurl, webroot);
275 		strcat(wurl, sf);
276 		fd = open(wurl, OREAD);
277 		if(fd<0)
278 			continue;
279 		dir = dirfstat(fd);
280 		if(dir == nil){
281 			close(fd);
282 			continue;
283 		}
284 		close(fd);
285 		snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers);
286 		siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999);
287 		free(dir);
288 		if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0)
289 			sf = sf+prefix;
290 		hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf);
291 		newhint++;
292 continuei: ;
293 	}
294 	if(newhint)
295 		hprint(hout, "Fresh: have/%d\r\n", j);
296 }
297 
298