xref: /plan9-contrib/sys/src/cmd/ip/httpd/hints.c (revision d46c239f8612929b7dbade67d0d071633df3a15d)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "httpd.h"
5 #include "httpsrv.h"
6 
7 enum{ URLmax = 65536, HINTmax = 20 };
8 #define RECIPLOG2 1.44269504089
9 
10 char **urlname;				/* array of url strings    1,...,nurl */
11 static int nurl;
12 static uint urltab[URLmax];		/* hashstr(url)  1,...,nurl */
13 static int urlnext[URLmax];		/* index urltab of next url in chain */
14 static int urlhash[URLmax];		/* initially 0, meaning empty buckets */
15 
16 typedef struct Hint {
17 	ushort url;
18 	uchar prob;
19 } Hint;
20 Hint *hints[URLmax];
21 uchar nhint[URLmax];
22 
23 vlong
24 Bfilelen(void *vb)
25 {
26 	Biobuf *b;
27 	vlong n;
28 
29 	b = vb;
30 	n = Bseek(b, 0L, 2);
31 	Bseek(b, 0L, 0);
32 	return n;
33 }
34 
35 static uint
36 hashstr(char* key)
37 {
38 	/* asu works better than pjw for urls */
39 	uchar *k = (unsigned char*)key;
40 	uint h = 0;
41 	while(*k!=0)
42 		h = 65599*h + *k++;
43         return h;
44 }
45 
46 static int
47 urllookup(uint url)
48 {
49 	/* returns +index into urltab, else -hash */
50 	int j, hash;
51 
52 	hash = 1 + url%(URLmax-1);
53 	j = urlhash[hash];
54 	while(1){
55 		if(j==0)
56 			return -hash;
57 		if(url==urltab[j])
58 			return j;
59 		j = urlnext[j];
60 	}
61 	return 0;  /* not reached */
62 }
63 
64 int
65 Bage(Biobuf *b)
66 {
67 	Dir *dir;
68 	long mtime;
69 
70 	dir = dirfstat(Bfildes(b));
71 	if(dir != nil)
72 		mtime = dir->mtime;
73 	else
74 		mtime = 0;
75 	free(dir);
76 	return time(nil) - mtime;
77 }
78 
79 void
80 urlinit(void)
81 {
82 	static Biobuf *b = nil;
83 	static vlong filelen = 0;
84 	vlong newlen;
85 	char *s, *arena;
86 	int i, j, n;
87 	uint url;
88 	char *file;
89 
90 	if(filelen < 0)
91 		return;
92 	file = "/sys/log/httpd/url";
93 	if(b == nil){
94 		b = Bopen(file, OREAD); /* first time */
95 		if(b == nil){
96 			syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
97 			filelen = -1;
98 			return;
99 		}
100 	}
101 	newlen = Bfilelen(b); /* side effect: rewinds b */
102 	if(newlen == filelen || Bage(b)<300)
103 		return;
104 	filelen = newlen;
105 	if(filelen < 0)
106 		return;
107 	if(nurl){ /* free existing tables */
108 		free(urlname[0]); /* arena */
109 		memset(urlhash,0,sizeof urlhash);
110 		memset(urlnext,0,sizeof urlnext);
111 		nurl = 0;
112 	}
113 	if(urlname==nil)
114 		urlname = (char**)ezalloc(URLmax*sizeof(*urlname));
115 	arena = (char*)ezalloc(filelen);  /* enough for all the strcpy below */
116 	i = 1;
117 	while((s=Brdline(b,'\n'))!=0){
118 		/* read lines of the form:  999 /url/path */
119 		n = Blinelen(b) - 1;
120 		if(n>2 && s[n]=='\n'){
121 			s[n] = '\0';
122 		}else{
123 			sysfatal("missing fields or newline in url-db");
124 		}
125 		j = strtoul(s,&s,10);
126 		while(*s==' ')
127 			s++;
128 		if(i++!=j)
129 			sysfatal("url-db synchronization error");
130 		url = hashstr(s);
131 		j = urllookup(url);
132 		if(j>=0)
133 			sysfatal("duplicate url");
134 		j = -j;
135 		nurl++;
136 		if(nurl>=URLmax){
137 			syslog(0, HTTPLOG, "urlinit overflow at %s",s);
138 			free(urlname[0]); /* arena */
139 			memset(urlhash,0,sizeof urlhash);
140 			memset(urlnext,0,sizeof urlnext);
141 			nurl = 0;
142 			return;
143 		}
144 		urltab[nurl] = url;
145 		urlnext[nurl] = urlhash[j];
146 		urlhash[j] = nurl;
147 		strcpy(arena,s);
148 		urlname[nurl] = arena;
149 		arena += strlen(s)+1;
150 	}
151 	syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen));
152 	/* b is held open, because namespace will be chopped */
153 }
154 
155 void
156 statsinit(void)
157 {
158 	static Biobuf *b = nil;
159 	static vlong filelen = 0;
160 	vlong newlen;
161 	int iq, n, i, nstats = 0;
162 	uchar *s, buf[3+HINTmax*3];  /* iq, n, (url,prob)... */
163 	Hint *arena, *h;
164 	char *file;
165 	static void *oldarena = nil;
166 
167 	file = "/sys/log/httpd/pathstat";
168 	if(b == nil){
169 		if(filelen == -1)
170 			return; /* if failed first time */
171 		b = Bopen(file, OREAD); /* first time */
172 		if(b == nil){
173 			syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
174 			filelen = -1;
175 			return;
176 		}
177 	}
178 	newlen = Bfilelen(b); /* side effect: rewinds b */
179 	if(newlen == filelen || Bage(b)<300)
180 		return;
181 	filelen = newlen;
182 	if(oldarena){
183 		free(oldarena);
184 		memset(nhint,0,sizeof nhint);
185 	}
186 	arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint));
187 	oldarena = arena;
188 	while(1){
189 		i = Bread(b,buf,3);
190 		if(i<3)
191 			break;
192 		nstats++;
193 		iq = buf[0];
194 		iq = (iq<<8) | buf[1];
195 		n = buf[2];
196 		h = arena;
197 		arena += n;
198 		hints[iq] = h;
199 		nhint[iq] = n;
200 		if(Bread(b,buf,3*n)!=3*n)
201 			sysfatal("stats read error");
202 		for(i=0; i<n; i++){
203 			s = &buf[3*i];
204 			h[i].url = (s[0]<<8) | s[1];
205 			h[i].prob = s[2];
206 		}
207 	}
208 	syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint)));
209 }
210 
211 void
212 urlcanon(char *url)
213 {
214 	/* all the changes here can be implemented by rewriting in-place */
215 	char *p, *q;
216 
217 	/* remove extraneous '/' in the middle and at the end */
218 	p = url+1;  /* first char needs no change */
219 	q = p;
220 	while(q[0]){
221 		if(q[0]=='/' && q[-1]=='/'){
222 			q++;
223 			continue;
224 		}
225 		*p++ = *q++;
226 	}
227 	if(q[-1]=='/'){  /* trailing '/' */
228 		p[-1] = '\0';
229 	}else{
230 		p[0] = '\0';
231 	}
232 
233 	/* specific to the cm.bell-labs.com web site */
234 	if(strncmp(url,"/cm/",4)==0){
235 		if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0)
236 			/* strip off /cm/cs */
237 			memmove(url,url+6,strlen(url+6)+1);
238 		else if(strncmp(url+4,"ms/what/wavelet",15)==0)
239 			/* /cm/ms/what */
240 			memmove(url,url+11,strlen(url+11)+1);
241 	}
242 }
243 
244 void
245 hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej)
246 {
247 	int i, j, pr, prefix, fd, siz, havei, newhint = 0, n;
248 	char *query, *sf, etag[32], *wurl;
249 	Dir *dir;
250 	Hint *h, *haveh;
251 
252 	query = hstrdup(hc, uri);
253 	urlcanon(query);
254 	j = urllookup(hashstr(query));
255 	if(j < 0)
256 		return;
257 	query = strrchr(uri,'/');
258 	if(!query)
259 		return;  /* can't happen */
260 	prefix = query-uri+1;  /* = strlen(dirname)+1 */
261 	h = hints[j];
262 	for(i=0; i<nhint[j]; i++){
263 		if(havej > 0 && havej < URLmax){ /* exclude hints client has */
264 			haveh = hints[havej];
265 			for(havei=0; havei<nhint[havej]; havei++)
266 				if( haveh[havei].url == h[i].url)
267 					goto continuei;
268 		}
269 		sf = urlname[h[i].url];
270 		pr = h[i].prob;
271 		if(pr<thresh)
272 			break;
273 		n = strlen(webroot) + strlen(sf) + 1;
274 		wurl = halloc(hc, n);
275 		strcpy(wurl, webroot);
276 		strcat(wurl, sf);
277 		fd = open(wurl, OREAD);
278 		if(fd<0)
279 			continue;
280 		dir = dirfstat(fd);
281 		if(dir == nil){
282 			close(fd);
283 			continue;
284 		}
285 		close(fd);
286 		snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers);
287 		siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999);
288 		free(dir);
289 		if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0)
290 			sf = sf+prefix;
291 		hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf);
292 		newhint++;
293 continuei: ;
294 	}
295 	if(newhint)
296 		hprint(hout, "Fresh: have/%d\r\n", j);
297 }
298 
299