1 /* $NetBSD: rcache.c,v 1.4 1999/10/01 04:35:23 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Martin J. Laubach <mjl@emsi.priv.at> and 9 * Manuel Bouyer <Manuel.Bouyer@lip6.fr>. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 /*-----------------------------------------------------------------------*/ 40 #include <sys/types.h> 41 #include <sys/uio.h> 42 #include <sys/mman.h> 43 #include <sys/param.h> 44 #include <sys/sysctl.h> 45 #include <ufs/ufs/dinode.h> 46 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <unistd.h> 50 #include <fcntl.h> 51 #include <errno.h> 52 #include <string.h> 53 54 #include "dump.h" 55 56 /*-----------------------------------------------------------------------*/ 57 #define MAXCACHEBUFS 512 /* max 512 buffers */ 58 #define MAXMEMPART 6 /* max 15% of the user mem */ 59 60 /*-----------------------------------------------------------------------*/ 61 struct cheader { 62 volatile size_t count; 63 }; 64 65 struct cdesc { 66 volatile daddr_t blkstart; 67 volatile daddr_t blkend;/* start + nblksread */ 68 volatile daddr_t blocksRead; 69 volatile size_t time; 70 #ifdef DIAGNOSTICS 71 volatile pid_t owner; 72 #endif 73 }; 74 75 static int findlru __P((void)); 76 77 static void *shareBuffer = NULL; 78 static struct cheader *cheader; 79 static struct cdesc *cdesc; 80 static char *cdata; 81 static int cachebufs; 82 static int nblksread; 83 84 #ifdef STATS 85 static int nreads; 86 static int nphysread; 87 static int64_t readsize; 88 static int64_t physreadsize; 89 #endif 90 91 #define CDATA(i) (cdata + ((i) * nblksread * dev_bsize)) 92 93 /*-----------------------------------------------------------------------*/ 94 void 95 initcache(cachesize, readblksize) 96 int cachesize; 97 int readblksize; 98 { 99 size_t len; 100 size_t sharedSize; 101 102 nblksread = (readblksize + ufsib->ufs_bsize - 1) / ufsib->ufs_bsize; 103 if(cachesize == -1) { /* Compute from memory available */ 104 int usermem; 105 int mib[2] = { CTL_HW, HW_USERMEM }; 106 107 len = sizeof(usermem); 108 if (sysctl(mib, 2, &usermem, &len, NULL, 0) < 0) { 109 msg("sysctl(hw.usermem) failed: %s\n", strerror(errno)); 110 return; 111 } 112 cachebufs = (usermem / MAXMEMPART) / (nblksread * dev_bsize); 113 } else { /* User specified */ 114 cachebufs = cachesize; 115 } 116 117 if(cachebufs) { /* Don't allocate if zero --> no caching */ 118 if (cachebufs > MAXCACHEBUFS) 119 cachebufs = MAXCACHEBUFS; 120 121 sharedSize = sizeof(struct cheader) + 122 sizeof(struct cdesc) * cachebufs + 123 nblksread * cachebufs * dev_bsize; 124 #ifdef STATS 125 fprintf(stderr, "Using %d buffers (%d bytes)\n", cachebufs, 126 sharedSize); 127 #endif 128 shareBuffer = mmap(NULL, sharedSize, PROT_READ | PROT_WRITE, 129 MAP_ANON | MAP_SHARED, -1, 0); 130 if (shareBuffer == (void *)-1) { 131 msg("can't mmap shared memory for buffer: %s\n", 132 strerror(errno)); 133 return; 134 } 135 cheader = shareBuffer; 136 cdesc = (struct cdesc *) (((char *) shareBuffer) + 137 sizeof(struct cheader)); 138 cdata = ((char *) shareBuffer) + sizeof(struct cheader) + 139 sizeof(struct cdesc) * cachebufs; 140 141 memset(shareBuffer, '\0', sharedSize); 142 } 143 } 144 /*-----------------------------------------------------------------------*/ 145 /* Find the cache buffer descriptor that shows the minimal access time */ 146 147 static int 148 findlru() 149 { 150 int i; 151 int minTime = cdesc[0].time; 152 int minIdx = 0; 153 154 for (i = 0; i < cachebufs; i++) { 155 if (cdesc[i].time < minTime) { 156 minIdx = i; 157 minTime = cdesc[i].time; 158 } 159 } 160 161 return minIdx; 162 } 163 /*-----------------------------------------------------------------------*/ 164 /* 165 * Read data directly from disk, with smart error handling. 166 * Try to recover from hard errors by reading in sector sized pieces. 167 * Error recovery is attempted at most BREADEMAX times before seeking 168 * consent from the operator to continue. 169 */ 170 171 172 static int breaderrors = 0; 173 #define BREADEMAX 32 174 175 void 176 rawread(blkno, buf, size) 177 daddr_t blkno; 178 char *buf; 179 int size; 180 { 181 int cnt, i; 182 #ifdef STATS 183 nphysread++; 184 physreadsize += size; 185 #endif 186 187 if (lseek(diskfd, ((off_t) blkno << dev_bshift), 0) < 0) { 188 msg("rawread: lseek fails\n"); 189 goto err; 190 } 191 if ((cnt = read(diskfd, buf, size)) == size) 192 return; 193 if (cnt == -1) 194 msg("read error from %s: %s: [block %d]: count=%d\n", 195 disk, strerror(errno), blkno, size); 196 else 197 msg("short read error from %s: [block %d]: count=%d, got=%d\n", 198 disk, blkno, size, cnt); 199 err: 200 if (++breaderrors > BREADEMAX) { 201 msg("More than %d block read errors from %d\n", 202 BREADEMAX, disk); 203 broadcast("DUMP IS AILING!\n"); 204 msg("This is an unrecoverable error.\n"); 205 if (!query("Do you want to attempt to continue?")){ 206 dumpabort(0); 207 /*NOTREACHED*/ 208 } else 209 breaderrors = 0; 210 } 211 /* 212 * Zero buffer, then try to read each sector of buffer separately. 213 */ 214 memset(buf, 0, size); 215 for (i = 0; i < size; i += dev_bsize, buf += dev_bsize, blkno++) { 216 if (lseek(diskfd, ((off_t)blkno << dev_bshift), 0) < 0) { 217 msg("rawread: lseek2 fails: %s!\n", 218 strerror(errno)); 219 continue; 220 } 221 if ((cnt = read(diskfd, buf, (int)dev_bsize)) == dev_bsize) 222 continue; 223 if (cnt == -1) { 224 msg("read error from %s: %s: [sector %d]: count=%d: " 225 "%s\n", disk, strerror(errno), blkno, dev_bsize, 226 strerror(errno)); 227 continue; 228 } 229 msg("short read error from %s: [sector %d]: count=%d, got=%d\n", 230 disk, blkno, dev_bsize, cnt); 231 } 232 } 233 234 /*-----------------------------------------------------------------------*/ 235 #define min(a,b) (((a) < (b)) ? (a) : (b)) 236 237 void 238 bread(blkno, buf, size) 239 daddr_t blkno; 240 char *buf; 241 int size; 242 { 243 int osize = size; 244 daddr_t oblkno = blkno; 245 char *obuf = buf; 246 daddr_t numBlocks = (size + dev_bsize -1) / dev_bsize; 247 248 #ifdef STATS 249 nreads++; 250 readsize += size; 251 #endif 252 253 if (!shareBuffer) { 254 rawread(blkno, buf, size); 255 return; 256 } 257 258 if (flock(diskfd, LOCK_EX)) { 259 msg("flock(LOCK_EX) failed: %s\n", 260 strerror(errno)); 261 rawread(blkno, buf, size); 262 return; 263 } 264 265 266 retry: 267 while(size > 0) { 268 int i; 269 270 for (i = 0; i < cachebufs; i++) { 271 struct cdesc *curr = &cdesc[i]; 272 273 #ifdef DIAGNOSTICS 274 if (curr->owner) { 275 fprintf(stderr, "Owner is set (%d, me=%d), can" 276 "not happen.\n", curr->owner, getpid()); 277 } 278 #endif 279 280 if (curr->blkend == 0) 281 continue; 282 /* 283 * If we find a bit of the read in the buffers, 284 * now compute how many blocks we can copy, 285 * copy them out, adjust blkno, buf and size, 286 * and restart 287 */ 288 if (curr->blkstart <= blkno && 289 blkno < curr->blkend) { 290 /* Number of data blocks to be copied */ 291 int toCopy = min(size, 292 (curr->blkend - blkno) * dev_bsize); 293 #ifdef DIAGNOSTICS 294 if (toCopy <= 0 || 295 toCopy > nblksread * dev_bsize) { 296 fprintf(stderr, "toCopy %d !\n", 297 toCopy); 298 dumpabort(0); 299 } 300 if (CDATA(i) + (blkno - curr->blkstart) * 301 dev_bsize < CDATA(i) || 302 CDATA(i) + (blkno - curr->blkstart) * 303 dev_bsize > 304 CDATA(i) + nblksread * dev_bsize) { 305 fprintf(stderr, "%p < %p !!!\n", 306 CDATA(i) + (blkno - 307 curr->blkstart) * dev_bsize, 308 CDATA(i)); 309 fprintf(stderr, "cdesc[i].blkstart %d " 310 "blkno %d dev_bsize %ld\n", 311 curr->blkstart, blkno, dev_bsize); 312 dumpabort(0); 313 } 314 #endif 315 memcpy(buf, CDATA(i) + 316 (blkno - curr->blkstart) * dev_bsize, 317 toCopy); 318 319 buf += toCopy; 320 size -= toCopy; 321 blkno += (toCopy + dev_bsize - 1) / dev_bsize; 322 numBlocks -= 323 (toCopy + dev_bsize - 1) / dev_bsize; 324 325 curr->time = cheader->count++; 326 327 /* 328 * If all data of a cache block have been 329 * read, chances are good no more reads 330 * will occur, so expire the cache immediately 331 */ 332 333 curr->blocksRead += 334 (toCopy + dev_bsize -1) / dev_bsize; 335 if (curr->blocksRead >= nblksread) 336 curr->time = 0; 337 338 goto retry; 339 } 340 } 341 342 /* No more to do? */ 343 if (size == 0) 344 break; 345 346 /* 347 * This does actually not happen if fs blocks are not greater 348 * than nblksread. 349 */ 350 if (numBlocks > nblksread) { 351 rawread(oblkno, obuf, osize); 352 break; 353 } else { 354 int idx; 355 ssize_t rsize; 356 daddr_t blockBlkNo; 357 358 blockBlkNo = (blkno / nblksread) * nblksread; 359 idx = findlru(); 360 rsize = min(nblksread, 361 ufsib->ufs_dsize - blockBlkNo) * 362 dev_bsize; 363 364 #ifdef DIAGNOSTICS 365 if (cdesc[idx].owner) 366 fprintf(stderr, "Owner is set (%d, me=%d), can" 367 "not happen(2).\n", cdesc[idx].owner, 368 getpid()); 369 cdesc[idx].owner = getpid(); 370 #endif 371 cdesc[idx].time = cheader->count++; 372 cdesc[idx].blkstart = blockBlkNo; 373 cdesc[idx].blocksRead = 0; 374 375 if (lseek(diskfd, 376 ((off_t) (blockBlkNo) << dev_bshift), 0) < 0) { 377 msg("readBlocks: lseek fails: %s\n", 378 strerror(errno)); 379 rsize = -1; 380 } else { 381 rsize = read(diskfd, CDATA(idx), rsize); 382 if (rsize < 0) { 383 msg("readBlocks: read fails: %s\n", 384 strerror(errno)); 385 } 386 } 387 388 /* On errors, panic, punt, try to read without 389 * cache and let raw read routine do the rest. 390 */ 391 392 if (rsize <= 0) { 393 rawread(oblkno, obuf, osize); 394 #ifdef DIAGNOSTICS 395 if (cdesc[idx].owner != getpid()) 396 fprintf(stderr, "Owner changed from " 397 "%d to %d, can't happen\n", 398 getpid(), cdesc[idx].owner); 399 cdesc[idx].owner = 0; 400 #endif 401 break; 402 } 403 404 /* On short read, just note the fact and go on */ 405 cdesc[idx].blkend = blockBlkNo + rsize / dev_bsize; 406 407 #ifdef STATS 408 nphysread++; 409 physreadsize += rsize; 410 #endif 411 #ifdef DIAGNOSTICS 412 if (cdesc[idx].owner != getpid()) 413 fprintf(stderr, "Owner changed from " 414 "%d to %d, can't happen\n", 415 getpid(), cdesc[idx].owner); 416 cdesc[idx].owner = 0; 417 #endif 418 /* 419 * We swapped some of data in, let the loop fetch 420 * them from cache 421 */ 422 } 423 } 424 425 if (flock(diskfd, LOCK_UN)) 426 msg("flock(LOCK_UN) failed: %s\n", 427 strerror(errno)); 428 return; 429 } 430 431 /*-----------------------------------------------------------------------*/ 432 void 433 printcachestats() 434 { 435 #ifdef STATS 436 fprintf(stderr, "Pid %d: %d reads (%u bytes) " 437 "%d physical reads (%u bytes) %d%% hits, %d%% overhead\n", 438 getpid(), nreads, (u_int) readsize, nphysread, 439 (u_int) physreadsize, (nreads - nphysread) * 100 / nreads, 440 (int) (((physreadsize - readsize) * 100) / readsize)); 441 #endif 442 } 443 444 /*-----------------------------------------------------------------------*/ 445