xref: /netbsd-src/sbin/dump/rcache.c (revision 481fca6e59249d8ffcf24fef7cfbe7b131bfb080)
1 /*      $NetBSD: rcache.c,v 1.4 1999/10/01 04:35:23 perseant Exp $       */
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Martin J. Laubach <mjl@emsi.priv.at> and
9  *    Manuel Bouyer <Manuel.Bouyer@lip6.fr>.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *      This product includes software developed by the NetBSD
22  *      Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 /*-----------------------------------------------------------------------*/
40 #include <sys/types.h>
41 #include <sys/uio.h>
42 #include <sys/mman.h>
43 #include <sys/param.h>
44 #include <sys/sysctl.h>
45 #include <ufs/ufs/dinode.h>
46 
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <fcntl.h>
51 #include <errno.h>
52 #include <string.h>
53 
54 #include "dump.h"
55 
56 /*-----------------------------------------------------------------------*/
57 #define MAXCACHEBUFS	512	/* max 512 buffers */
58 #define MAXMEMPART	6	/* max 15% of the user mem */
59 
60 /*-----------------------------------------------------------------------*/
61 struct cheader {
62 	volatile size_t count;
63 };
64 
65 struct cdesc {
66 	volatile daddr_t blkstart;
67 	volatile daddr_t blkend;/* start + nblksread */
68 	volatile daddr_t blocksRead;
69 	volatile size_t time;
70 #ifdef DIAGNOSTICS
71 	volatile pid_t owner;
72 #endif
73 };
74 
75 static int findlru __P((void));
76 
77 static void *shareBuffer = NULL;
78 static struct cheader *cheader;
79 static struct cdesc *cdesc;
80 static char *cdata;
81 static int cachebufs;
82 static int nblksread;
83 
84 #ifdef STATS
85 static int nreads;
86 static int nphysread;
87 static int64_t readsize;
88 static int64_t physreadsize;
89 #endif
90 
91 #define CDATA(i)	(cdata + ((i) * nblksread * dev_bsize))
92 
93 /*-----------------------------------------------------------------------*/
94 void
95 initcache(cachesize, readblksize)
96 	int cachesize;
97 	int readblksize;
98 {
99 	size_t len;
100 	size_t  sharedSize;
101 
102 	nblksread = (readblksize + ufsib->ufs_bsize - 1) / ufsib->ufs_bsize;
103 	if(cachesize == -1) {	/* Compute from memory available */
104 		int usermem;
105 		int mib[2] = { CTL_HW, HW_USERMEM };
106 
107 		len = sizeof(usermem);
108 		if (sysctl(mib, 2, &usermem, &len, NULL, 0) < 0) {
109 			msg("sysctl(hw.usermem) failed: %s\n", strerror(errno));
110 			return;
111 		}
112 		cachebufs = (usermem / MAXMEMPART) / (nblksread * dev_bsize);
113 	} else {		/* User specified */
114 		cachebufs = cachesize;
115 	}
116 
117 	if(cachebufs) {	/* Don't allocate if zero --> no caching */
118 		if (cachebufs > MAXCACHEBUFS)
119 			cachebufs = MAXCACHEBUFS;
120 
121 		sharedSize = sizeof(struct cheader) +
122 	   	    sizeof(struct cdesc) * cachebufs +
123 	   	    nblksread * cachebufs * dev_bsize;
124 #ifdef STATS
125 		fprintf(stderr, "Using %d buffers (%d bytes)\n", cachebufs,
126 	   	    sharedSize);
127 #endif
128 		shareBuffer = mmap(NULL, sharedSize, PROT_READ | PROT_WRITE,
129 	   	    MAP_ANON | MAP_SHARED, -1, 0);
130 		if (shareBuffer == (void *)-1) {
131 			msg("can't mmap shared memory for buffer: %s\n",
132 			    strerror(errno));
133 			return;
134 		}
135 		cheader = shareBuffer;
136 		cdesc = (struct cdesc *) (((char *) shareBuffer) +
137 		    sizeof(struct cheader));
138 		cdata = ((char *) shareBuffer) + sizeof(struct cheader) +
139 	   	    sizeof(struct cdesc) * cachebufs;
140 
141 		memset(shareBuffer, '\0', sharedSize);
142 	}
143 }
144 /*-----------------------------------------------------------------------*/
145 /* Find the cache buffer descriptor that shows the minimal access time */
146 
147 static int
148 findlru()
149 {
150 	int     i;
151 	int     minTime = cdesc[0].time;
152 	int     minIdx = 0;
153 
154 	for (i = 0; i < cachebufs; i++) {
155 		if (cdesc[i].time < minTime) {
156 			minIdx = i;
157 			minTime = cdesc[i].time;
158 		}
159 	}
160 
161 	return minIdx;
162 }
163 /*-----------------------------------------------------------------------*/
164 /*
165  * Read data directly from disk, with smart error handling.
166  * Try to recover from hard errors by reading in sector sized pieces.
167  * Error recovery is attempted at most BREADEMAX times before seeking
168  * consent from the operator to continue.
169  */
170 
171 
172 static int breaderrors = 0;
173 #define BREADEMAX 32
174 
175 void
176 rawread(blkno, buf, size)
177 	daddr_t blkno;
178 	char *buf;
179 	int size;
180 {
181 	int cnt, i;
182 #ifdef STATS
183 	nphysread++;
184 	physreadsize += size;
185 #endif
186 
187 	if (lseek(diskfd, ((off_t) blkno << dev_bshift), 0) < 0) {
188 		msg("rawread: lseek fails\n");
189 		goto err;
190 	}
191 	if ((cnt =  read(diskfd, buf, size)) == size)
192 		return;
193 	if (cnt == -1)
194 		msg("read error from %s: %s: [block %d]: count=%d\n",
195 			disk, strerror(errno), blkno, size);
196 	else
197 		msg("short read error from %s: [block %d]: count=%d, got=%d\n",
198 			disk, blkno, size, cnt);
199 err:
200 	if (++breaderrors > BREADEMAX) {
201 		msg("More than %d block read errors from %d\n",
202 			BREADEMAX, disk);
203 		broadcast("DUMP IS AILING!\n");
204 		msg("This is an unrecoverable error.\n");
205 		if (!query("Do you want to attempt to continue?")){
206 			dumpabort(0);
207 			/*NOTREACHED*/
208 		} else
209 			breaderrors = 0;
210 	}
211 	/*
212 	 * Zero buffer, then try to read each sector of buffer separately.
213 	 */
214 	memset(buf, 0, size);
215 	for (i = 0; i < size; i += dev_bsize, buf += dev_bsize, blkno++) {
216 		if (lseek(diskfd, ((off_t)blkno << dev_bshift), 0) < 0) {
217 			msg("rawread: lseek2 fails: %s!\n",
218 			    strerror(errno));
219 			continue;
220 		}
221 		if ((cnt = read(diskfd, buf, (int)dev_bsize)) == dev_bsize)
222 			continue;
223 		if (cnt == -1) {
224 			msg("read error from %s: %s: [sector %d]: count=%d: "
225 			    "%s\n", disk, strerror(errno), blkno, dev_bsize,
226 			    strerror(errno));
227 			continue;
228 		}
229 		msg("short read error from %s: [sector %d]: count=%d, got=%d\n",
230 		    disk, blkno, dev_bsize, cnt);
231 	}
232 }
233 
234 /*-----------------------------------------------------------------------*/
235 #define min(a,b)	(((a) < (b)) ? (a) : (b))
236 
237 void
238 bread(blkno, buf, size)
239 	daddr_t blkno;
240 	char *buf;
241 	int size;
242 {
243 	int     osize = size;
244 	daddr_t oblkno = blkno;
245 	char   *obuf = buf;
246 	daddr_t numBlocks = (size + dev_bsize -1) / dev_bsize;
247 
248 #ifdef STATS
249 	nreads++;
250 	readsize += size;
251 #endif
252 
253 	if (!shareBuffer) {
254 		rawread(blkno, buf, size);
255 		return;
256 	}
257 
258 	if (flock(diskfd, LOCK_EX)) {
259 		msg("flock(LOCK_EX) failed: %s\n",
260 		    strerror(errno));
261 		rawread(blkno, buf, size);
262 		return;
263 	}
264 
265 
266 retry:
267 	while(size > 0) {
268 		int     i;
269 
270 		for (i = 0; i < cachebufs; i++) {
271 			struct cdesc *curr = &cdesc[i];
272 
273 #ifdef DIAGNOSTICS
274 			if (curr->owner) {
275 				fprintf(stderr, "Owner is set (%d, me=%d), can"
276 				    "not happen.\n", curr->owner, getpid());
277 			}
278 #endif
279 
280 			if (curr->blkend == 0)
281 				continue;
282 			/*
283 			 * If we find a bit of the read in the buffers,
284 			 * now compute how many blocks we can copy,
285 			 * copy them out, adjust blkno, buf and size,
286 			 * and restart
287 			 */
288 			if (curr->blkstart <= blkno &&
289 			    blkno < curr->blkend) {
290 				/* Number of data blocks to be copied */
291 				int toCopy = min(size,
292 				    (curr->blkend - blkno) * dev_bsize);
293 #ifdef DIAGNOSTICS
294 				if (toCopy <= 0 ||
295 				    toCopy > nblksread * dev_bsize) {
296 					fprintf(stderr, "toCopy %d !\n",
297 					    toCopy);
298 					dumpabort(0);
299 				}
300 				if (CDATA(i) + (blkno - curr->blkstart) *
301 			   	    dev_bsize < CDATA(i) ||
302 			   	    CDATA(i) + (blkno - curr->blkstart) *
303 			   	    dev_bsize >
304 				    CDATA(i) + nblksread * dev_bsize) {
305 					fprintf(stderr, "%p < %p !!!\n",
306 				   	   CDATA(i) + (blkno -
307 						curr->blkstart) * dev_bsize,
308 					   CDATA(i));
309 					fprintf(stderr, "cdesc[i].blkstart %d "
310 					    "blkno %d dev_bsize %ld\n",
311 				   	    curr->blkstart, blkno, dev_bsize);
312 					dumpabort(0);
313 				}
314 #endif
315 				memcpy(buf, CDATA(i) +
316 				    (blkno - curr->blkstart) * dev_bsize,
317 			   	    toCopy);
318 
319 				buf 	+= toCopy;
320 				size 	-= toCopy;
321 				blkno 	+= (toCopy + dev_bsize - 1) / dev_bsize;
322 				numBlocks -=
323 				    (toCopy  + dev_bsize - 1) / dev_bsize;
324 
325 				curr->time = cheader->count++;
326 
327 				/*
328 				 * If all data of a cache block have been
329 				 * read, chances are good no more reads
330 				 * will occur, so expire the cache immediately
331 				 */
332 
333 				curr->blocksRead +=
334 				    (toCopy + dev_bsize -1) / dev_bsize;
335 				if (curr->blocksRead >= nblksread)
336 					curr->time = 0;
337 
338 				goto retry;
339 			}
340 		}
341 
342 		/* No more to do? */
343 		if (size == 0)
344 			break;
345 
346 		/*
347 		 * This does actually not happen if fs blocks are not greater
348 		 * than nblksread.
349 		 */
350 		if (numBlocks > nblksread) {
351 			rawread(oblkno, obuf, osize);
352 			break;
353 		} else {
354 			int     idx;
355 			ssize_t rsize;
356 			daddr_t blockBlkNo;
357 
358 			blockBlkNo = (blkno / nblksread) * nblksread;
359 			idx = findlru();
360 			rsize = min(nblksread,
361 			    ufsib->ufs_dsize - blockBlkNo) *
362 			    dev_bsize;
363 
364 #ifdef DIAGNOSTICS
365 			if (cdesc[idx].owner)
366 				fprintf(stderr, "Owner is set (%d, me=%d), can"
367 				    "not happen(2).\n", cdesc[idx].owner,
368 				    getpid());
369 			cdesc[idx].owner = getpid();
370 #endif
371 			cdesc[idx].time = cheader->count++;
372 			cdesc[idx].blkstart = blockBlkNo;
373 			cdesc[idx].blocksRead = 0;
374 
375 			if (lseek(diskfd,
376 			    ((off_t) (blockBlkNo) << dev_bshift), 0) < 0) {
377 				msg("readBlocks: lseek fails: %s\n",
378 				    strerror(errno));
379 				rsize = -1;
380 			} else {
381 				rsize = read(diskfd, CDATA(idx), rsize);
382 				if (rsize < 0) {
383 					msg("readBlocks: read fails: %s\n",
384 					    strerror(errno));
385 				}
386 			}
387 
388 			/* On errors, panic, punt, try to read without
389 			 * cache and let raw read routine do the rest.
390 			 */
391 
392 			if (rsize <= 0) {
393 				rawread(oblkno, obuf, osize);
394 #ifdef DIAGNOSTICS
395 				if (cdesc[idx].owner != getpid())
396 					fprintf(stderr, "Owner changed from "
397 					    "%d to %d, can't happen\n",
398 					    getpid(), cdesc[idx].owner);
399 				cdesc[idx].owner = 0;
400 #endif
401 				break;
402 			}
403 
404 			/* On short read, just note the fact and go on */
405 			cdesc[idx].blkend = blockBlkNo + rsize / dev_bsize;
406 
407 #ifdef STATS
408 			nphysread++;
409 			physreadsize += rsize;
410 #endif
411 #ifdef DIAGNOSTICS
412 			if (cdesc[idx].owner != getpid())
413 				fprintf(stderr, "Owner changed from "
414 				    "%d to %d, can't happen\n",
415 				    getpid(), cdesc[idx].owner);
416 			cdesc[idx].owner = 0;
417 #endif
418 			/*
419 			 * We swapped some of data in, let the loop fetch
420 			 * them from cache
421 			 */
422 		}
423 	}
424 
425 	if (flock(diskfd, LOCK_UN))
426 		msg("flock(LOCK_UN) failed: %s\n",
427 		    strerror(errno));
428 	return;
429 }
430 
431 /*-----------------------------------------------------------------------*/
432 void
433 printcachestats()
434 {
435 #ifdef STATS
436 	fprintf(stderr, "Pid %d: %d reads (%u bytes) "
437 	    "%d physical reads (%u bytes) %d%% hits, %d%% overhead\n",
438 	    getpid(), nreads, (u_int) readsize, nphysread,
439 	    (u_int) physreadsize, (nreads - nphysread) * 100 / nreads,
440 	    (int) (((physreadsize - readsize) * 100) / readsize));
441 #endif
442 }
443 
444 /*-----------------------------------------------------------------------*/
445