1 /* $NetBSD: efs_subr.c,v 1.7 2008/05/16 09:21:59 hannken Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/cdefs.h> 20 __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.7 2008/05/16 09:21:59 hannken Exp $"); 21 22 #include <sys/param.h> 23 #include <sys/kauth.h> 24 #include <sys/lwp.h> 25 #include <sys/proc.h> 26 #include <sys/buf.h> 27 #include <sys/mount.h> 28 #include <sys/vnode.h> 29 #include <sys/namei.h> 30 #include <sys/stat.h> 31 #include <sys/malloc.h> 32 33 #include <miscfs/genfs/genfs_node.h> 34 35 #include <fs/efs/efs.h> 36 #include <fs/efs/efs_sb.h> 37 #include <fs/efs/efs_dir.h> 38 #include <fs/efs/efs_genfs.h> 39 #include <fs/efs/efs_mount.h> 40 #include <fs/efs/efs_extent.h> 41 #include <fs/efs/efs_dinode.h> 42 #include <fs/efs/efs_inode.h> 43 #include <fs/efs/efs_subr.h> 44 45 struct pool efs_inode_pool; 46 47 /* 48 * Calculate a checksum for the provided superblock in __host byte order__. 49 * 50 * At some point SGI changed the checksum algorithm slightly, which can be 51 * enabled with the 'new' flag. 52 * 53 * Presumably this change occured on or before 24 Oct 1988 (around IRIX 3.1), 54 * so we're pretty unlikely to ever actually see an old checksum. Further, it 55 * means that EFS_NEWMAGIC filesystems (IRIX >= 3.3) must match the new 56 * checksum whereas EFS_MAGIC filesystems could potentially use either 57 * algorithm. 58 * 59 * See comp.sys.sgi <1991Aug9.050838.16876@odin.corp.sgi.com> 60 */ 61 int32_t 62 efs_sb_checksum(struct efs_sb *esb, int new) 63 { 64 int i; 65 int32_t cksum; 66 uint16_t *sbarray = (uint16_t *)esb; 67 68 KASSERT((EFS_SB_CHECKSUM_SIZE % 2) == 0); 69 70 for (i = cksum = 0; i < (EFS_SB_CHECKSUM_SIZE / 2); i++) { 71 cksum ^= be16toh(sbarray[i]); 72 cksum = (cksum << 1) | (new && cksum < 0); 73 } 74 75 return (cksum); 76 } 77 78 /* 79 * Determine if the superblock is valid. 80 * 81 * Returns 0 if valid, else invalid. If invalid, 'why' is set to an 82 * explanation. 83 */ 84 int 85 efs_sb_validate(struct efs_sb *esb, const char **why) 86 { 87 uint32_t ocksum, ncksum; 88 89 *why = NULL; 90 91 if (be32toh(esb->sb_magic) != EFS_SB_MAGIC && 92 be32toh(esb->sb_magic) != EFS_SB_NEWMAGIC) { 93 *why = "sb_magic invalid"; 94 return (1); 95 } 96 97 ocksum = htobe32(efs_sb_checksum(esb, 0)); 98 ncksum = htobe32(efs_sb_checksum(esb, 1)); 99 if (esb->sb_checksum != ocksum && esb->sb_checksum != ncksum) { 100 *why = "sb_checksum invalid"; 101 return (1); 102 } 103 104 if (be32toh(esb->sb_size) > EFS_SIZE_MAX) { 105 *why = "sb_size > EFS_SIZE_MAX"; 106 return (1); 107 } 108 109 if (be32toh(esb->sb_firstcg) <= EFS_BB_BITMAP) { 110 *why = "sb_firstcg <= EFS_BB_BITMAP"; 111 return (1); 112 } 113 114 /* XXX - add better sb consistency checks here */ 115 if (esb->sb_cgfsize == 0 || 116 esb->sb_cgisize == 0 || 117 esb->sb_ncg == 0 || 118 esb->sb_bmsize == 0) { 119 *why = "something bad happened"; 120 return (1); 121 } 122 123 return (0); 124 } 125 126 /* 127 * Determine the basic block offset and inode index within that block, given 128 * the inode 'ino' and filesystem parameters _in host byte order_. The inode 129 * will live at byte address 'bboff' * EFS_BB_SIZE + 'index' * EFS_DINODE_SIZE. 130 */ 131 void 132 efs_locate_inode(ino_t ino, struct efs_sb *sbp, uint32_t *bboff, int *index) 133 { 134 uint32_t cgfsize, firstcg; 135 uint16_t cgisize; 136 137 cgisize = be16toh(sbp->sb_cgisize); 138 cgfsize = be32toh(sbp->sb_cgfsize); 139 firstcg = be32toh(sbp->sb_firstcg), 140 141 *bboff = firstcg + ((ino / (cgisize * EFS_DINODES_PER_BB)) * cgfsize) + 142 ((ino % (cgisize * EFS_DINODES_PER_BB)) / EFS_DINODES_PER_BB); 143 *index = ino & (EFS_DINODES_PER_BB - 1); 144 } 145 146 /* 147 * Read in an inode from disk. 148 * 149 * We actually take in four inodes at a time. Hopefully these will stick 150 * around in the buffer cache and get used without going to disk. 151 * 152 * Returns 0 on success. 153 */ 154 int 155 efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l, 156 struct efs_dinode *di) 157 { 158 struct efs_sb *sbp; 159 struct buf *bp; 160 int index, err; 161 uint32_t bboff; 162 163 sbp = &emp->em_sb; 164 efs_locate_inode(ino, sbp, &bboff, &index); 165 166 err = efs_bread(emp, bboff, l, &bp); 167 if (err) { 168 brelse(bp, 0); 169 return (err); 170 } 171 memcpy(di, ((struct efs_dinode *)bp->b_data) + index, sizeof(*di)); 172 brelse(bp, 0); 173 174 return (0); 175 } 176 177 /* 178 * Perform a read from our device handling the potential DEV_BSIZE 179 * messiness (although as of 19.2.2006, all ports appear to use 512) as 180 * we as EFS block sizing. 181 * 182 * bboff: basic block offset 183 * 184 * Returns 0 on success. 185 */ 186 int 187 efs_bread(struct efs_mount *emp, uint32_t bboff, struct lwp *l, struct buf **bp) 188 { 189 KASSERT(bboff < EFS_SIZE_MAX); 190 191 return (bread(emp->em_devvp, (daddr_t)bboff * (EFS_BB_SIZE / DEV_BSIZE), 192 EFS_BB_SIZE, (l == NULL) ? NOCRED : l->l_cred, 0, bp)); 193 } 194 195 /* 196 * Synchronise the in-core, host ordered and typed inode fields with their 197 * corresponding on-disk, EFS ordered and typed copies. 198 * 199 * This is the inverse of efs_dinode_sync_inode(), and should be called when 200 * an inode is loaded from disk. 201 */ 202 void 203 efs_sync_dinode_to_inode(struct efs_inode *ei) 204 { 205 206 ei->ei_mode = be16toh(ei->ei_di.di_mode); /*same as nbsd*/ 207 ei->ei_nlink = be16toh(ei->ei_di.di_nlink); 208 ei->ei_uid = be16toh(ei->ei_di.di_uid); 209 ei->ei_gid = be16toh(ei->ei_di.di_gid); 210 ei->ei_size = be32toh(ei->ei_di.di_size); 211 ei->ei_atime = be32toh(ei->ei_di.di_atime); 212 ei->ei_mtime = be32toh(ei->ei_di.di_mtime); 213 ei->ei_ctime = be32toh(ei->ei_di.di_ctime); 214 ei->ei_gen = be32toh(ei->ei_di.di_gen); 215 ei->ei_numextents = be16toh(ei->ei_di.di_numextents); 216 ei->ei_version = ei->ei_di.di_version; 217 } 218 219 /* 220 * Synchronise the on-disk, EFS ordered and typed inode fields with their 221 * corresponding in-core, host ordered and typed copies. 222 * 223 * This is the inverse of efs_inode_sync_dinode(), and should be called before 224 * an inode is flushed to disk. 225 */ 226 void 227 efs_sync_inode_to_dinode(struct efs_inode *ei) 228 { 229 230 panic("readonly -- no need to call me"); 231 } 232 233 #ifdef DIAGNOSTIC 234 /* 235 * Ensure that the in-core inode's host cached fields match its on-disk copy. 236 * 237 * Returns 0 if they match. 238 */ 239 static int 240 efs_is_inode_synced(struct efs_inode *ei) 241 { 242 int s; 243 244 s = 0; 245 /* XXX -- see above remarks about assumption */ 246 s += (ei->ei_mode != be16toh(ei->ei_di.di_mode)); 247 s += (ei->ei_nlink != be16toh(ei->ei_di.di_nlink)); 248 s += (ei->ei_uid != be16toh(ei->ei_di.di_uid)); 249 s += (ei->ei_gid != be16toh(ei->ei_di.di_gid)); 250 s += (ei->ei_size != be32toh(ei->ei_di.di_size)); 251 s += (ei->ei_atime != be32toh(ei->ei_di.di_atime)); 252 s += (ei->ei_mtime != be32toh(ei->ei_di.di_mtime)); 253 s += (ei->ei_ctime != be32toh(ei->ei_di.di_ctime)); 254 s += (ei->ei_gen != be32toh(ei->ei_di.di_gen)); 255 s += (ei->ei_numextents != be16toh(ei->ei_di.di_numextents)); 256 s += (ei->ei_version != ei->ei_di.di_version); 257 258 return (s); 259 } 260 #endif 261 262 /* 263 * Given an efs_dirblk structure and a componentname to search for, return the 264 * corresponding inode if it is found. 265 * 266 * Returns 0 on success. 267 */ 268 static int 269 efs_dirblk_lookup(struct efs_dirblk *dir, struct componentname *cn, 270 ino_t *inode) 271 { 272 struct efs_dirent *de; 273 int i, slot, offset; 274 275 KASSERT(cn->cn_namelen <= EFS_DIRENT_NAMELEN_MAX); 276 277 slot = offset = 0; 278 279 for (i = 0; i < dir->db_slots; i++) { 280 offset = EFS_DIRENT_OFF_EXPND(dir->db_space[i]); 281 282 if (offset == EFS_DIRBLK_SLOT_FREE) 283 continue; 284 285 de = (struct efs_dirent *)((char *)dir + offset); 286 if (de->de_namelen == cn->cn_namelen && 287 (strncmp(cn->cn_nameptr, de->de_name, cn->cn_namelen) == 0)){ 288 slot = i; 289 break; 290 } 291 } 292 if (i == dir->db_slots) 293 return (ENOENT); 294 295 KASSERT(slot < offset && offset < EFS_DIRBLK_SPACE_SIZE); 296 de = (struct efs_dirent *)((char *)dir + offset); 297 *inode = be32toh(de->de_inumber); 298 299 return (0); 300 } 301 302 /* 303 * Given an extent descriptor that represents a directory, look up 304 * componentname within its efs_dirblk's. If it is found, return the 305 * corresponding inode in 'ino'. 306 * 307 * Returns 0 on success. 308 */ 309 static int 310 efs_extent_lookup(struct efs_mount *emp, struct efs_extent *ex, 311 struct componentname *cn, ino_t *ino) 312 { 313 struct efs_dirblk *db; 314 struct buf *bp; 315 int i, err; 316 317 /* 318 * Read in each of the dirblks until we find our entry. 319 * If we don't, return ENOENT. 320 */ 321 for (i = 0; i < ex->ex_length; i++) { 322 err = efs_bread(emp, ex->ex_bn + i, NULL, &bp); 323 if (err) { 324 printf("efs: warning: invalid extent descriptor\n"); 325 brelse(bp, 0); 326 return (err); 327 } 328 329 db = (struct efs_dirblk *)bp->b_data; 330 if (efs_dirblk_lookup(db, cn, ino) == 0) { 331 brelse(bp, 0); 332 return (0); 333 } 334 brelse(bp, 0); 335 } 336 337 return (ENOENT); 338 } 339 340 /* 341 * Given the provided in-core inode, look up the pathname requested. If 342 * we find it, 'ino' reflects its corresponding on-disk inode number. 343 * 344 * Returns 0 on success. 345 */ 346 int 347 efs_inode_lookup(struct efs_mount *emp, struct efs_inode *ei, 348 struct componentname *cn, ino_t *ino) 349 { 350 struct efs_extent ex; 351 struct efs_extent_iterator exi; 352 int ret; 353 354 KASSERT(VOP_ISLOCKED(ei->ei_vp)); 355 KASSERT(efs_is_inode_synced(ei) == 0); 356 KASSERT((ei->ei_mode & S_IFMT) == S_IFDIR); 357 358 efs_extent_iterator_init(&exi, ei, 0); 359 while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) { 360 if (efs_extent_lookup(emp, &ex, cn, ino) == 0) { 361 return (0); 362 } 363 } 364 365 return ((ret == -1) ? ENOENT : ret); 366 } 367 368 /* 369 * Convert on-disk extent structure to in-core format. 370 */ 371 void 372 efs_dextent_to_extent(struct efs_dextent *dex, struct efs_extent *ex) 373 { 374 375 KASSERT(dex != NULL && ex != NULL); 376 377 ex->ex_magic = dex->ex_bytes[0]; 378 ex->ex_bn = be32toh(dex->ex_words[0]) & 0x00ffffff; 379 ex->ex_length = dex->ex_bytes[4]; 380 ex->ex_offset = be32toh(dex->ex_words[1]) & 0x00ffffff; 381 } 382 383 /* 384 * Convert in-core extent format to on-disk structure. 385 */ 386 void 387 efs_extent_to_dextent(struct efs_extent *ex, struct efs_dextent *dex) 388 { 389 390 KASSERT(ex != NULL && dex != NULL); 391 KASSERT(ex->ex_magic == EFS_EXTENT_MAGIC); 392 KASSERT((ex->ex_bn & ~EFS_EXTENT_BN_MASK) == 0); 393 KASSERT((ex->ex_offset & ~EFS_EXTENT_OFFSET_MASK) == 0); 394 395 dex->ex_words[0] = htobe32(ex->ex_bn); 396 dex->ex_bytes[0] = ex->ex_magic; 397 dex->ex_words[1] = htobe32(ex->ex_offset); 398 dex->ex_bytes[4] = ex->ex_length; 399 } 400 401 /* 402 * Initialise an extent iterator. 403 * 404 * If start_hint is non-0, attempt to set up the iterator beginning with the 405 * extent descriptor in which the start_hint'th byte exists. Callers must not 406 * expect success (this is simply an optimisation), so we reserve the right 407 * to start from the beginning. 408 */ 409 void 410 efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip, 411 off_t start_hint) 412 { 413 struct efs_extent ex, ex2; 414 struct buf *bp; 415 struct efs_mount *emp = VFSTOEFS(eip->ei_vp->v_mount); 416 off_t offset, length, next; 417 int i, err, numextents, numinextents; 418 int hi, lo, mid; 419 int indir; 420 421 exi->exi_eip = eip; 422 exi->exi_next = 0; 423 exi->exi_dnext = 0; 424 exi->exi_innext = 0; 425 426 if (start_hint == 0) 427 return; 428 429 /* force iterator to end if hint is too big */ 430 if (start_hint >= eip->ei_size) { 431 exi->exi_next = eip->ei_numextents; 432 return; 433 } 434 435 /* 436 * Use start_hint to jump to the right extent descriptor. We'll 437 * iterate over the 12 indirect extents because it's cheap, then 438 * bring the appropriate vector into core and binary search it. 439 */ 440 441 /* 442 * Handle the small file case separately first... 443 */ 444 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) { 445 for (i = 0; i < eip->ei_numextents; i++) { 446 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex); 447 448 offset = ex.ex_offset * EFS_BB_SIZE; 449 length = ex.ex_length * EFS_BB_SIZE; 450 451 if (start_hint >= offset && 452 start_hint < (offset + length)) { 453 exi->exi_next = exi->exi_dnext = i; 454 return; 455 } 456 } 457 458 /* shouldn't get here, no? */ 459 EFS_DPRINTF(("efs_extent_iterator_init: bad direct extents\n")); 460 return; 461 } 462 463 /* 464 * Now do the large files with indirect extents... 465 * 466 * The first indirect extent's ex_offset field contains the 467 * number of indirect extents used. 468 */ 469 efs_dextent_to_extent(&eip->ei_di.di_extents[0], &ex); 470 471 numinextents = ex.ex_offset; 472 if (numinextents < 1 || numinextents >= EFS_DIRECTEXTENTS) { 473 EFS_DPRINTF(("efs_extent_iterator_init: bad ex.ex_offset\n")); 474 return; 475 } 476 477 next = 0; 478 indir = -1; 479 numextents = 0; 480 for (i = 0; i < numinextents; i++) { 481 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex); 482 483 err = efs_bread(emp, ex.ex_bn, NULL, &bp); 484 if (err) { 485 brelse(bp, 0); 486 return; 487 } 488 489 efs_dextent_to_extent((struct efs_dextent *)bp->b_data, &ex2); 490 brelse(bp, 0); 491 492 offset = ex2.ex_offset * EFS_BB_SIZE; 493 494 if (offset > start_hint) { 495 indir = MAX(0, i - 1); 496 break; 497 } 498 499 /* number of extents prior to this indirect vector of extents */ 500 next += numextents; 501 502 /* number of extents within this indirect vector of extents */ 503 numextents = ex.ex_length * EFS_EXTENTS_PER_BB; 504 numextents = MIN(numextents, eip->ei_numextents - next); 505 } 506 507 /* 508 * We hit the end, so assume it's in the last extent. 509 */ 510 if (indir == -1) 511 indir = numinextents - 1; 512 513 /* 514 * Binary search to find our desired direct extent. 515 */ 516 lo = 0; 517 mid = 0; 518 hi = numextents - 1; 519 efs_dextent_to_extent(&eip->ei_di.di_extents[indir], &ex); 520 while (lo <= hi) { 521 int bboff; 522 int index; 523 524 mid = (lo + hi) / 2; 525 526 bboff = mid / EFS_EXTENTS_PER_BB; 527 index = mid % EFS_EXTENTS_PER_BB; 528 529 err = efs_bread(emp, ex.ex_bn + bboff, NULL, &bp); 530 if (err) { 531 brelse(bp, 0); 532 EFS_DPRINTF(("efs_extent_iterator_init: bsrch read\n")); 533 return; 534 } 535 536 efs_dextent_to_extent((struct efs_dextent *)bp->b_data + index, 537 &ex2); 538 brelse(bp, 0); 539 540 offset = ex2.ex_offset * EFS_BB_SIZE; 541 length = ex2.ex_length * EFS_BB_SIZE; 542 543 if (start_hint >= offset && start_hint < (offset + length)) 544 break; 545 546 if (start_hint < offset) 547 hi = mid - 1; 548 else 549 lo = mid + 1; 550 } 551 552 /* 553 * This is bad. Either the hint is bogus (which shouldn't 554 * happen) or the extent list must be screwed up. We 555 * have to abort. 556 */ 557 if (lo > hi) { 558 EFS_DPRINTF(("efs_extent_iterator_init: bsearch " 559 "failed to find extent\n")); 560 return; 561 } 562 563 exi->exi_next = next + mid; 564 exi->exi_dnext = indir; 565 exi->exi_innext = mid; 566 } 567 568 /* 569 * Return the next EFS extent. 570 * 571 * Returns 0 if another extent was iterated, -1 if we've exhausted all 572 * extents, or an error number. If 'exi' is non-NULL, the next extent is 573 * written to it (should it exist). 574 */ 575 int 576 efs_extent_iterator_next(struct efs_extent_iterator *exi, 577 struct efs_extent *exp) 578 { 579 struct efs_extent ex; 580 struct efs_dextent *dexp; 581 struct efs_inode *eip = exi->exi_eip; 582 struct buf *bp; 583 int err, bboff, index; 584 585 if (exi->exi_next++ >= eip->ei_numextents) 586 return (-1); 587 588 /* direct or indirect extents? */ 589 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) { 590 if (exp != NULL) { 591 dexp = &eip->ei_di.di_extents[exi->exi_dnext++]; 592 efs_dextent_to_extent(dexp, exp); 593 } 594 } else { 595 efs_dextent_to_extent( 596 &eip->ei_di.di_extents[exi->exi_dnext], &ex); 597 598 bboff = exi->exi_innext / EFS_EXTENTS_PER_BB; 599 index = exi->exi_innext % EFS_EXTENTS_PER_BB; 600 601 err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount), 602 ex.ex_bn + bboff, NULL, &bp); 603 if (err) { 604 EFS_DPRINTF(("efs_extent_iterator_next: " 605 "efs_bread failed: %d\n", err)); 606 brelse(bp, 0); 607 return (err); 608 } 609 610 if (exp != NULL) { 611 dexp = (struct efs_dextent *)bp->b_data + index; 612 efs_dextent_to_extent(dexp, exp); 613 } 614 brelse(bp, 0); 615 616 bboff = exi->exi_innext++ / EFS_EXTENTS_PER_BB; 617 if (bboff >= ex.ex_length) { 618 exi->exi_innext = 0; 619 exi->exi_dnext++; 620 } 621 } 622 623 return (0); 624 } 625