1 /* $NetBSD: rcache.c,v 1.6 2001/05/27 14:17:57 lukem Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Martin J. Laubach <mjl@emsi.priv.at> and 9 * Manuel Bouyer <Manuel.Bouyer@lip6.fr>. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 /*-----------------------------------------------------------------------*/ 40 #include <sys/types.h> 41 #include <sys/uio.h> 42 #include <sys/mman.h> 43 #include <sys/param.h> 44 #include <sys/sysctl.h> 45 #include <ufs/ufs/dinode.h> 46 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <unistd.h> 50 #include <fcntl.h> 51 #include <errno.h> 52 #include <string.h> 53 54 #include "dump.h" 55 56 /*-----------------------------------------------------------------------*/ 57 #define MAXCACHEBUFS 512 /* max 512 buffers */ 58 #define MAXMEMPART 6 /* max 15% of the user mem */ 59 60 /*-----------------------------------------------------------------------*/ 61 struct cheader { 62 volatile size_t count; 63 }; 64 65 struct cdesc { 66 volatile daddr_t blkstart; 67 volatile daddr_t blkend;/* start + nblksread */ 68 volatile daddr_t blocksRead; 69 volatile size_t time; 70 #ifdef DIAGNOSTICS 71 volatile pid_t owner; 72 #endif 73 }; 74 75 static int findlru(void); 76 77 static void *shareBuffer = NULL; 78 static struct cheader *cheader; 79 static struct cdesc *cdesc; 80 static char *cdata; 81 static int cachebufs; 82 static int nblksread; 83 84 #ifdef STATS 85 static int nreads; 86 static int nphysread; 87 static int64_t readsize; 88 static int64_t physreadsize; 89 #endif 90 91 #define CDATA(i) (cdata + ((i) * nblksread * dev_bsize)) 92 93 void 94 initcache(int cachesize, int readblksize) 95 { 96 size_t len; 97 size_t sharedSize; 98 99 nblksread = (readblksize + ufsib->ufs_bsize - 1) / ufsib->ufs_bsize; 100 if(cachesize == -1) { /* Compute from memory available */ 101 int usermem; 102 int mib[2] = { CTL_HW, HW_USERMEM }; 103 104 len = sizeof(usermem); 105 if (sysctl(mib, 2, &usermem, &len, NULL, 0) < 0) { 106 msg("sysctl(hw.usermem) failed: %s\n", strerror(errno)); 107 return; 108 } 109 cachebufs = (usermem / MAXMEMPART) / (nblksread * dev_bsize); 110 } else { /* User specified */ 111 cachebufs = cachesize; 112 } 113 114 if(cachebufs) { /* Don't allocate if zero --> no caching */ 115 if (cachebufs > MAXCACHEBUFS) 116 cachebufs = MAXCACHEBUFS; 117 118 sharedSize = sizeof(struct cheader) + 119 sizeof(struct cdesc) * cachebufs + 120 nblksread * cachebufs * dev_bsize; 121 #ifdef STATS 122 fprintf(stderr, "Using %d buffers (%d bytes)\n", cachebufs, 123 sharedSize); 124 #endif 125 shareBuffer = mmap(NULL, sharedSize, PROT_READ | PROT_WRITE, 126 MAP_ANON | MAP_SHARED, -1, 0); 127 if (shareBuffer == (void *)-1) { 128 msg("can't mmap shared memory for buffer: %s\n", 129 strerror(errno)); 130 return; 131 } 132 cheader = shareBuffer; 133 cdesc = (struct cdesc *) (((char *) shareBuffer) + 134 sizeof(struct cheader)); 135 cdata = ((char *) shareBuffer) + sizeof(struct cheader) + 136 sizeof(struct cdesc) * cachebufs; 137 138 memset(shareBuffer, '\0', sharedSize); 139 } 140 } 141 142 /* 143 * Find the cache buffer descriptor that shows the minimal access time 144 */ 145 static int 146 findlru(void) 147 { 148 int i; 149 int minTime = cdesc[0].time; 150 int minIdx = 0; 151 152 for (i = 0; i < cachebufs; i++) { 153 if (cdesc[i].time < minTime) { 154 minIdx = i; 155 minTime = cdesc[i].time; 156 } 157 } 158 159 return minIdx; 160 } 161 162 /* 163 * Read data directly from disk, with smart error handling. 164 * Try to recover from hard errors by reading in sector sized pieces. 165 * Error recovery is attempted at most BREADEMAX times before seeking 166 * consent from the operator to continue. 167 */ 168 169 static int breaderrors = 0; 170 #define BREADEMAX 32 171 172 void 173 rawread(daddr_t blkno, char *buf, int size) 174 { 175 int cnt, i; 176 #ifdef STATS 177 nphysread++; 178 physreadsize += size; 179 #endif 180 181 if (lseek(diskfd, ((off_t) blkno << dev_bshift), 0) < 0) { 182 msg("rawread: lseek fails\n"); 183 goto err; 184 } 185 if ((cnt = read(diskfd, buf, size)) == size) 186 return; 187 if (cnt == -1) 188 msg("read error from %s: %s: [block %d]: count=%d\n", 189 disk, strerror(errno), blkno, size); 190 else 191 msg("short read error from %s: [block %d]: count=%d, got=%d\n", 192 disk, blkno, size, cnt); 193 err: 194 if (++breaderrors > BREADEMAX) { 195 msg("More than %d block read errors from %s\n", 196 BREADEMAX, disk); 197 broadcast("DUMP IS AILING!\n"); 198 msg("This is an unrecoverable error.\n"); 199 if (!query("Do you want to attempt to continue?")){ 200 dumpabort(0); 201 /*NOTREACHED*/ 202 } else 203 breaderrors = 0; 204 } 205 /* 206 * Zero buffer, then try to read each sector of buffer separately. 207 */ 208 memset(buf, 0, size); 209 for (i = 0; i < size; i += dev_bsize, buf += dev_bsize, blkno++) { 210 if (lseek(diskfd, ((off_t)blkno << dev_bshift), 0) < 0) { 211 msg("rawread: lseek2 fails: %s!\n", 212 strerror(errno)); 213 continue; 214 } 215 if ((cnt = read(diskfd, buf, (int)dev_bsize)) == dev_bsize) 216 continue; 217 if (cnt == -1) { 218 msg("read error from %s: %s: [sector %d]: count=%ld: " 219 "%s\n", disk, strerror(errno), blkno, dev_bsize, 220 strerror(errno)); 221 continue; 222 } 223 msg("short read error from %s: [sector %d]: count=%ld, got=%d\n", 224 disk, blkno, dev_bsize, cnt); 225 } 226 } 227 228 void 229 bread(daddr_t blkno, char *buf, int size) 230 { 231 int osize = size; 232 daddr_t oblkno = blkno; 233 char *obuf = buf; 234 daddr_t numBlocks = (size + dev_bsize -1) / dev_bsize; 235 236 #ifdef STATS 237 nreads++; 238 readsize += size; 239 #endif 240 241 if (!shareBuffer) { 242 rawread(blkno, buf, size); 243 return; 244 } 245 246 if (flock(diskfd, LOCK_EX)) { 247 msg("flock(LOCK_EX) failed: %s\n", 248 strerror(errno)); 249 rawread(blkno, buf, size); 250 return; 251 } 252 253 254 retry: 255 while(size > 0) { 256 int i; 257 258 for (i = 0; i < cachebufs; i++) { 259 struct cdesc *curr = &cdesc[i]; 260 261 #ifdef DIAGNOSTICS 262 if (curr->owner) { 263 fprintf(stderr, "Owner is set (%d, me=%d), can" 264 "not happen.\n", curr->owner, getpid()); 265 } 266 #endif 267 268 if (curr->blkend == 0) 269 continue; 270 /* 271 * If we find a bit of the read in the buffers, 272 * now compute how many blocks we can copy, 273 * copy them out, adjust blkno, buf and size, 274 * and restart 275 */ 276 if (curr->blkstart <= blkno && 277 blkno < curr->blkend) { 278 /* Number of data blocks to be copied */ 279 int toCopy = MIN(size, 280 (curr->blkend - blkno) * dev_bsize); 281 #ifdef DIAGNOSTICS 282 if (toCopy <= 0 || 283 toCopy > nblksread * dev_bsize) { 284 fprintf(stderr, "toCopy %d !\n", 285 toCopy); 286 dumpabort(0); 287 } 288 if (CDATA(i) + (blkno - curr->blkstart) * 289 dev_bsize < CDATA(i) || 290 CDATA(i) + (blkno - curr->blkstart) * 291 dev_bsize > 292 CDATA(i) + nblksread * dev_bsize) { 293 fprintf(stderr, "%p < %p !!!\n", 294 CDATA(i) + (blkno - 295 curr->blkstart) * dev_bsize, 296 CDATA(i)); 297 fprintf(stderr, "cdesc[i].blkstart %d " 298 "blkno %d dev_bsize %ld\n", 299 curr->blkstart, blkno, dev_bsize); 300 dumpabort(0); 301 } 302 #endif 303 memcpy(buf, CDATA(i) + 304 (blkno - curr->blkstart) * dev_bsize, 305 toCopy); 306 307 buf += toCopy; 308 size -= toCopy; 309 blkno += (toCopy + dev_bsize - 1) / dev_bsize; 310 numBlocks -= 311 (toCopy + dev_bsize - 1) / dev_bsize; 312 313 curr->time = cheader->count++; 314 315 /* 316 * If all data of a cache block have been 317 * read, chances are good no more reads 318 * will occur, so expire the cache immediately 319 */ 320 321 curr->blocksRead += 322 (toCopy + dev_bsize -1) / dev_bsize; 323 if (curr->blocksRead >= nblksread) 324 curr->time = 0; 325 326 goto retry; 327 } 328 } 329 330 /* No more to do? */ 331 if (size == 0) 332 break; 333 334 /* 335 * This does actually not happen if fs blocks are not greater 336 * than nblksread. 337 */ 338 if (numBlocks > nblksread) { 339 rawread(oblkno, obuf, osize); 340 break; 341 } else { 342 int idx; 343 ssize_t rsize; 344 daddr_t blockBlkNo; 345 346 blockBlkNo = (blkno / nblksread) * nblksread; 347 idx = findlru(); 348 rsize = MIN(nblksread, 349 ufsib->ufs_dsize - blockBlkNo) * 350 dev_bsize; 351 352 #ifdef DIAGNOSTICS 353 if (cdesc[idx].owner) 354 fprintf(stderr, "Owner is set (%d, me=%d), can" 355 "not happen(2).\n", cdesc[idx].owner, 356 getpid()); 357 cdesc[idx].owner = getpid(); 358 #endif 359 cdesc[idx].time = cheader->count++; 360 cdesc[idx].blkstart = blockBlkNo; 361 cdesc[idx].blocksRead = 0; 362 363 if (lseek(diskfd, 364 ((off_t) (blockBlkNo) << dev_bshift), 0) < 0) { 365 msg("readBlocks: lseek fails: %s\n", 366 strerror(errno)); 367 rsize = -1; 368 } else { 369 rsize = read(diskfd, CDATA(idx), rsize); 370 if (rsize < 0) { 371 msg("readBlocks: read fails: %s\n", 372 strerror(errno)); 373 } 374 } 375 376 /* On errors, panic, punt, try to read without 377 * cache and let raw read routine do the rest. 378 */ 379 380 if (rsize <= 0) { 381 rawread(oblkno, obuf, osize); 382 #ifdef DIAGNOSTICS 383 if (cdesc[idx].owner != getpid()) 384 fprintf(stderr, "Owner changed from " 385 "%d to %d, can't happen\n", 386 getpid(), cdesc[idx].owner); 387 cdesc[idx].owner = 0; 388 #endif 389 break; 390 } 391 392 /* On short read, just note the fact and go on */ 393 cdesc[idx].blkend = blockBlkNo + rsize / dev_bsize; 394 395 #ifdef STATS 396 nphysread++; 397 physreadsize += rsize; 398 #endif 399 #ifdef DIAGNOSTICS 400 if (cdesc[idx].owner != getpid()) 401 fprintf(stderr, "Owner changed from " 402 "%d to %d, can't happen\n", 403 getpid(), cdesc[idx].owner); 404 cdesc[idx].owner = 0; 405 #endif 406 /* 407 * We swapped some of data in, let the loop fetch 408 * them from cache 409 */ 410 } 411 } 412 413 if (flock(diskfd, LOCK_UN)) 414 msg("flock(LOCK_UN) failed: %s\n", 415 strerror(errno)); 416 return; 417 } 418 419 void 420 printcachestats(void) 421 { 422 #ifdef STATS 423 fprintf(stderr, "Pid %d: %d reads (%u bytes) " 424 "%d physical reads (%u bytes) %d%% hits, %d%% overhead\n", 425 getpid(), nreads, (u_int) readsize, nphysread, 426 (u_int) physreadsize, (nreads - nphysread) * 100 / nreads, 427 (int) (((physreadsize - readsize) * 100) / readsize)); 428 #endif 429 } 430