1 /* $NetBSD: efs_subr.c,v 1.12 2015/09/26 12:16:28 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/cdefs.h> 20 __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.12 2015/09/26 12:16:28 maxv Exp $"); 21 22 #include <sys/param.h> 23 #include <sys/kauth.h> 24 #include <sys/lwp.h> 25 #include <sys/proc.h> 26 #include <sys/buf.h> 27 #include <sys/mount.h> 28 #include <sys/vnode.h> 29 #include <sys/namei.h> 30 #include <sys/stat.h> 31 #include <sys/malloc.h> 32 33 #include <miscfs/genfs/genfs_node.h> 34 35 #include <fs/efs/efs.h> 36 #include <fs/efs/efs_sb.h> 37 #include <fs/efs/efs_dir.h> 38 #include <fs/efs/efs_genfs.h> 39 #include <fs/efs/efs_mount.h> 40 #include <fs/efs/efs_extent.h> 41 #include <fs/efs/efs_dinode.h> 42 #include <fs/efs/efs_inode.h> 43 #include <fs/efs/efs_subr.h> 44 45 struct pool efs_inode_pool; 46 47 /* 48 * Calculate a checksum for the provided superblock in __host byte order__. 49 * 50 * At some point SGI changed the checksum algorithm slightly, which can be 51 * enabled with the 'new' flag. 52 * 53 * Presumably this change occured on or before 24 Oct 1988 (around IRIX 3.1), 54 * so we're pretty unlikely to ever actually see an old checksum. Further, it 55 * means that EFS_NEWMAGIC filesystems (IRIX >= 3.3) must match the new 56 * checksum whereas EFS_MAGIC filesystems could potentially use either 57 * algorithm. 58 * 59 * See comp.sys.sgi <1991Aug9.050838.16876@odin.corp.sgi.com> 60 */ 61 int32_t 62 efs_sb_checksum(struct efs_sb *esb, int new) 63 { 64 int i; 65 int32_t cksum; 66 uint16_t *sbarray = (uint16_t *)esb; 67 68 KASSERT((EFS_SB_CHECKSUM_SIZE % 2) == 0); 69 70 for (i = cksum = 0; i < (EFS_SB_CHECKSUM_SIZE / 2); i++) { 71 cksum ^= be16toh(sbarray[i]); 72 cksum = (cksum << 1) | (new && cksum < 0); 73 } 74 75 return (cksum); 76 } 77 78 /* 79 * Determine if the superblock is valid. 80 * 81 * Returns 0 if valid, else invalid. If invalid, 'why' is set to an 82 * explanation. 83 */ 84 int 85 efs_sb_validate(struct efs_sb *esb, const char **why) 86 { 87 uint32_t ocksum, ncksum; 88 89 *why = NULL; 90 91 if (be32toh(esb->sb_magic) != EFS_SB_MAGIC && 92 be32toh(esb->sb_magic) != EFS_SB_NEWMAGIC) { 93 *why = "sb_magic invalid"; 94 return (1); 95 } 96 97 ocksum = htobe32(efs_sb_checksum(esb, 0)); 98 ncksum = htobe32(efs_sb_checksum(esb, 1)); 99 if (esb->sb_checksum != ocksum && esb->sb_checksum != ncksum) { 100 *why = "sb_checksum invalid"; 101 return (1); 102 } 103 104 if (be32toh(esb->sb_size) > EFS_SIZE_MAX) { 105 *why = "sb_size > EFS_SIZE_MAX"; 106 return (1); 107 } 108 109 if (be32toh(esb->sb_firstcg) <= EFS_BB_BITMAP) { 110 *why = "sb_firstcg <= EFS_BB_BITMAP"; 111 return (1); 112 } 113 114 /* XXX - add better sb consistency checks here */ 115 if (esb->sb_cgfsize == 0 || 116 esb->sb_cgisize == 0 || 117 esb->sb_ncg == 0 || 118 esb->sb_bmsize == 0) { 119 *why = "something bad happened"; 120 return (1); 121 } 122 123 return (0); 124 } 125 126 /* 127 * Determine the basic block offset and inode index within that block, given 128 * the inode 'ino' and filesystem parameters _in host byte order_. The inode 129 * will live at byte address 'bboff' * EFS_BB_SIZE + 'index' * EFS_DINODE_SIZE. 130 */ 131 void 132 efs_locate_inode(ino_t ino, struct efs_sb *sbp, uint32_t *bboff, int *index) 133 { 134 uint32_t cgfsize, firstcg; 135 uint16_t cgisize; 136 137 cgisize = be16toh(sbp->sb_cgisize); 138 cgfsize = be32toh(sbp->sb_cgfsize); 139 firstcg = be32toh(sbp->sb_firstcg); 140 141 *bboff = firstcg + ((ino / (cgisize * EFS_DINODES_PER_BB)) * cgfsize) + 142 ((ino % (cgisize * EFS_DINODES_PER_BB)) / EFS_DINODES_PER_BB); 143 *index = ino & (EFS_DINODES_PER_BB - 1); 144 } 145 146 /* 147 * Read in an inode from disk. 148 * 149 * We actually take in four inodes at a time. Hopefully these will stick 150 * around in the buffer cache and get used without going to disk. 151 * 152 * Returns 0 on success. 153 */ 154 int 155 efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l, 156 struct efs_dinode *di) 157 { 158 struct efs_sb *sbp; 159 struct buf *bp; 160 int index, err; 161 uint32_t bboff; 162 163 sbp = &emp->em_sb; 164 efs_locate_inode(ino, sbp, &bboff, &index); 165 166 err = efs_bread(emp, bboff, l, &bp); 167 if (err) { 168 return (err); 169 } 170 memcpy(di, ((struct efs_dinode *)bp->b_data) + index, sizeof(*di)); 171 brelse(bp, 0); 172 173 return (0); 174 } 175 176 /* 177 * Perform a read from our device handling the potential DEV_BSIZE 178 * messiness (although as of 19.2.2006, all ports appear to use 512) as 179 * we as EFS block sizing. 180 * 181 * bboff: basic block offset 182 * 183 * Returns 0 on success. 184 */ 185 int 186 efs_bread(struct efs_mount *emp, uint32_t bboff, struct lwp *l, struct buf **bp) 187 { 188 KASSERT(bboff < EFS_SIZE_MAX); 189 190 return (bread(emp->em_devvp, (daddr_t)bboff * (EFS_BB_SIZE / DEV_BSIZE), 191 EFS_BB_SIZE, 0, bp)); 192 } 193 194 /* 195 * Synchronise the in-core, host ordered and typed inode fields with their 196 * corresponding on-disk, EFS ordered and typed copies. 197 * 198 * This is the inverse of efs_dinode_sync_inode(), and should be called when 199 * an inode is loaded from disk. 200 */ 201 void 202 efs_sync_dinode_to_inode(struct efs_inode *ei) 203 { 204 205 ei->ei_mode = be16toh(ei->ei_di.di_mode); /*same as nbsd*/ 206 ei->ei_nlink = be16toh(ei->ei_di.di_nlink); 207 ei->ei_uid = be16toh(ei->ei_di.di_uid); 208 ei->ei_gid = be16toh(ei->ei_di.di_gid); 209 ei->ei_size = be32toh(ei->ei_di.di_size); 210 ei->ei_atime = be32toh(ei->ei_di.di_atime); 211 ei->ei_mtime = be32toh(ei->ei_di.di_mtime); 212 ei->ei_ctime = be32toh(ei->ei_di.di_ctime); 213 ei->ei_gen = be32toh(ei->ei_di.di_gen); 214 ei->ei_numextents = be16toh(ei->ei_di.di_numextents); 215 ei->ei_version = ei->ei_di.di_version; 216 } 217 218 /* 219 * Synchronise the on-disk, EFS ordered and typed inode fields with their 220 * corresponding in-core, host ordered and typed copies. 221 * 222 * This is the inverse of efs_inode_sync_dinode(), and should be called before 223 * an inode is flushed to disk. 224 */ 225 void 226 efs_sync_inode_to_dinode(struct efs_inode *ei) 227 { 228 229 panic("readonly -- no need to call me"); 230 } 231 232 #ifdef DIAGNOSTIC 233 /* 234 * Ensure that the in-core inode's host cached fields match its on-disk copy. 235 * 236 * Returns 0 if they match. 237 */ 238 static int 239 efs_is_inode_synced(struct efs_inode *ei) 240 { 241 int s; 242 243 s = 0; 244 /* XXX -- see above remarks about assumption */ 245 s += (ei->ei_mode != be16toh(ei->ei_di.di_mode)); 246 s += (ei->ei_nlink != be16toh(ei->ei_di.di_nlink)); 247 s += (ei->ei_uid != be16toh(ei->ei_di.di_uid)); 248 s += (ei->ei_gid != be16toh(ei->ei_di.di_gid)); 249 s += (ei->ei_size != be32toh(ei->ei_di.di_size)); 250 s += (ei->ei_atime != be32toh(ei->ei_di.di_atime)); 251 s += (ei->ei_mtime != be32toh(ei->ei_di.di_mtime)); 252 s += (ei->ei_ctime != be32toh(ei->ei_di.di_ctime)); 253 s += (ei->ei_gen != be32toh(ei->ei_di.di_gen)); 254 s += (ei->ei_numextents != be16toh(ei->ei_di.di_numextents)); 255 s += (ei->ei_version != ei->ei_di.di_version); 256 257 return (s); 258 } 259 #endif 260 261 /* 262 * Given an efs_dirblk structure and a componentname to search for, return the 263 * corresponding inode if it is found. 264 * 265 * Returns 0 on success. 266 */ 267 static int 268 efs_dirblk_lookup(struct efs_dirblk *dir, struct componentname *cn, 269 ino_t *inode) 270 { 271 struct efs_dirent *de; 272 int i, slot __diagused, offset; 273 274 KASSERT(cn->cn_namelen <= EFS_DIRENT_NAMELEN_MAX); 275 276 slot = offset = 0; 277 278 for (i = 0; i < dir->db_slots; i++) { 279 offset = EFS_DIRENT_OFF_EXPND(dir->db_space[i]); 280 281 if (offset == EFS_DIRBLK_SLOT_FREE) 282 continue; 283 284 de = (struct efs_dirent *)((char *)dir + offset); 285 if (de->de_namelen == cn->cn_namelen && 286 (strncmp(cn->cn_nameptr, de->de_name, cn->cn_namelen) == 0)){ 287 slot = i; 288 break; 289 } 290 } 291 if (i == dir->db_slots) 292 return (ENOENT); 293 294 KASSERT(slot < offset && offset < EFS_DIRBLK_SPACE_SIZE); 295 de = (struct efs_dirent *)((char *)dir + offset); 296 *inode = be32toh(de->de_inumber); 297 298 return (0); 299 } 300 301 /* 302 * Given an extent descriptor that represents a directory, look up 303 * componentname within its efs_dirblk's. If it is found, return the 304 * corresponding inode in 'ino'. 305 * 306 * Returns 0 on success. 307 */ 308 static int 309 efs_extent_lookup(struct efs_mount *emp, struct efs_extent *ex, 310 struct componentname *cn, ino_t *ino) 311 { 312 struct efs_dirblk *db; 313 struct buf *bp; 314 int i, err; 315 316 /* 317 * Read in each of the dirblks until we find our entry. 318 * If we don't, return ENOENT. 319 */ 320 for (i = 0; i < ex->ex_length; i++) { 321 err = efs_bread(emp, ex->ex_bn + i, NULL, &bp); 322 if (err) { 323 printf("efs: warning: invalid extent descriptor\n"); 324 return (err); 325 } 326 327 db = (struct efs_dirblk *)bp->b_data; 328 if (efs_dirblk_lookup(db, cn, ino) == 0) { 329 brelse(bp, 0); 330 return (0); 331 } 332 brelse(bp, 0); 333 } 334 335 return (ENOENT); 336 } 337 338 /* 339 * Given the provided in-core inode, look up the pathname requested. If 340 * we find it, 'ino' reflects its corresponding on-disk inode number. 341 * 342 * Returns 0 on success. 343 */ 344 int 345 efs_inode_lookup(struct efs_mount *emp, struct efs_inode *ei, 346 struct componentname *cn, ino_t *ino) 347 { 348 struct efs_extent ex; 349 struct efs_extent_iterator exi; 350 int ret; 351 352 KASSERT(VOP_ISLOCKED(ei->ei_vp)); 353 #ifdef DIAGNOSTIC 354 KASSERT(efs_is_inode_synced(ei) == 0); 355 #endif 356 KASSERT((ei->ei_mode & S_IFMT) == S_IFDIR); 357 358 efs_extent_iterator_init(&exi, ei, 0); 359 while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) { 360 if (efs_extent_lookup(emp, &ex, cn, ino) == 0) { 361 return (0); 362 } 363 } 364 365 return ((ret == -1) ? ENOENT : ret); 366 } 367 368 /* 369 * Convert on-disk extent structure to in-core format. 370 */ 371 void 372 efs_dextent_to_extent(struct efs_dextent *dex, struct efs_extent *ex) 373 { 374 375 KASSERT(dex != NULL && ex != NULL); 376 377 ex->ex_magic = dex->ex_bytes[0]; 378 ex->ex_bn = be32toh(dex->ex_words[0]) & 0x00ffffff; 379 ex->ex_length = dex->ex_bytes[4]; 380 ex->ex_offset = be32toh(dex->ex_words[1]) & 0x00ffffff; 381 } 382 383 /* 384 * Convert in-core extent format to on-disk structure. 385 */ 386 void 387 efs_extent_to_dextent(struct efs_extent *ex, struct efs_dextent *dex) 388 { 389 390 KASSERT(ex != NULL && dex != NULL); 391 KASSERT(ex->ex_magic == EFS_EXTENT_MAGIC); 392 KASSERT((ex->ex_bn & ~EFS_EXTENT_BN_MASK) == 0); 393 KASSERT((ex->ex_offset & ~EFS_EXTENT_OFFSET_MASK) == 0); 394 395 dex->ex_words[0] = htobe32(ex->ex_bn); 396 dex->ex_bytes[0] = ex->ex_magic; 397 dex->ex_words[1] = htobe32(ex->ex_offset); 398 dex->ex_bytes[4] = ex->ex_length; 399 } 400 401 /* 402 * Initialise an extent iterator. 403 * 404 * If start_hint is non-0, attempt to set up the iterator beginning with the 405 * extent descriptor in which the start_hint'th byte exists. Callers must not 406 * expect success (this is simply an optimisation), so we reserve the right 407 * to start from the beginning. 408 */ 409 void 410 efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip, 411 off_t start_hint) 412 { 413 struct efs_extent ex, ex2; 414 struct buf *bp; 415 struct efs_mount *emp = VFSTOEFS(eip->ei_vp->v_mount); 416 off_t offset, length, next; 417 int i, err, numextents, numinextents; 418 int hi, lo, mid; 419 int indir; 420 421 exi->exi_eip = eip; 422 exi->exi_next = 0; 423 exi->exi_dnext = 0; 424 exi->exi_innext = 0; 425 426 if (start_hint == 0) 427 return; 428 429 /* force iterator to end if hint is too big */ 430 if (start_hint >= eip->ei_size) { 431 exi->exi_next = eip->ei_numextents; 432 return; 433 } 434 435 /* 436 * Use start_hint to jump to the right extent descriptor. We'll 437 * iterate over the 12 indirect extents because it's cheap, then 438 * bring the appropriate vector into core and binary search it. 439 */ 440 441 /* 442 * Handle the small file case separately first... 443 */ 444 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) { 445 for (i = 0; i < eip->ei_numextents; i++) { 446 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex); 447 448 offset = ex.ex_offset * EFS_BB_SIZE; 449 length = ex.ex_length * EFS_BB_SIZE; 450 451 if (start_hint >= offset && 452 start_hint < (offset + length)) { 453 exi->exi_next = exi->exi_dnext = i; 454 return; 455 } 456 } 457 458 /* shouldn't get here, no? */ 459 EFS_DPRINTF(("efs_extent_iterator_init: bad direct extents\n")); 460 return; 461 } 462 463 /* 464 * Now do the large files with indirect extents... 465 * 466 * The first indirect extent's ex_offset field contains the 467 * number of indirect extents used. 468 */ 469 efs_dextent_to_extent(&eip->ei_di.di_extents[0], &ex); 470 471 numinextents = ex.ex_offset; 472 if (numinextents < 1 || numinextents >= EFS_DIRECTEXTENTS) { 473 EFS_DPRINTF(("efs_extent_iterator_init: bad ex.ex_offset\n")); 474 return; 475 } 476 477 next = 0; 478 indir = -1; 479 numextents = 0; 480 for (i = 0; i < numinextents; i++) { 481 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex); 482 483 err = efs_bread(emp, ex.ex_bn, NULL, &bp); 484 if (err) { 485 return; 486 } 487 488 efs_dextent_to_extent((struct efs_dextent *)bp->b_data, &ex2); 489 brelse(bp, 0); 490 491 offset = ex2.ex_offset * EFS_BB_SIZE; 492 493 if (offset > start_hint) { 494 indir = MAX(0, i - 1); 495 break; 496 } 497 498 /* number of extents prior to this indirect vector of extents */ 499 next += numextents; 500 501 /* number of extents within this indirect vector of extents */ 502 numextents = ex.ex_length * EFS_EXTENTS_PER_BB; 503 numextents = MIN(numextents, eip->ei_numextents - next); 504 } 505 506 /* 507 * We hit the end, so assume it's in the last extent. 508 */ 509 if (indir == -1) 510 indir = numinextents - 1; 511 512 /* 513 * Binary search to find our desired direct extent. 514 */ 515 lo = 0; 516 mid = 0; 517 hi = numextents - 1; 518 efs_dextent_to_extent(&eip->ei_di.di_extents[indir], &ex); 519 while (lo <= hi) { 520 int bboff; 521 int index; 522 523 mid = (lo + hi) / 2; 524 525 bboff = mid / EFS_EXTENTS_PER_BB; 526 index = mid % EFS_EXTENTS_PER_BB; 527 528 err = efs_bread(emp, ex.ex_bn + bboff, NULL, &bp); 529 if (err) { 530 EFS_DPRINTF(("efs_extent_iterator_init: bsrch read\n")); 531 return; 532 } 533 534 efs_dextent_to_extent((struct efs_dextent *)bp->b_data + index, 535 &ex2); 536 brelse(bp, 0); 537 538 offset = ex2.ex_offset * EFS_BB_SIZE; 539 length = ex2.ex_length * EFS_BB_SIZE; 540 541 if (start_hint >= offset && start_hint < (offset + length)) 542 break; 543 544 if (start_hint < offset) 545 hi = mid - 1; 546 else 547 lo = mid + 1; 548 } 549 550 /* 551 * This is bad. Either the hint is bogus (which shouldn't 552 * happen) or the extent list must be screwed up. We 553 * have to abort. 554 */ 555 if (lo > hi) { 556 EFS_DPRINTF(("efs_extent_iterator_init: bsearch " 557 "failed to find extent\n")); 558 return; 559 } 560 561 exi->exi_next = next + mid; 562 exi->exi_dnext = indir; 563 exi->exi_innext = mid; 564 } 565 566 /* 567 * Return the next EFS extent. 568 * 569 * Returns 0 if another extent was iterated, -1 if we've exhausted all 570 * extents, or an error number. If 'exi' is non-NULL, the next extent is 571 * written to it (should it exist). 572 */ 573 int 574 efs_extent_iterator_next(struct efs_extent_iterator *exi, 575 struct efs_extent *exp) 576 { 577 struct efs_extent ex; 578 struct efs_dextent *dexp; 579 struct efs_inode *eip = exi->exi_eip; 580 struct buf *bp; 581 int err, bboff, index; 582 583 if (exi->exi_next++ >= eip->ei_numextents) 584 return (-1); 585 586 /* direct or indirect extents? */ 587 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) { 588 if (exp != NULL) { 589 dexp = &eip->ei_di.di_extents[exi->exi_dnext++]; 590 efs_dextent_to_extent(dexp, exp); 591 } 592 } else { 593 efs_dextent_to_extent( 594 &eip->ei_di.di_extents[exi->exi_dnext], &ex); 595 596 bboff = exi->exi_innext / EFS_EXTENTS_PER_BB; 597 index = exi->exi_innext % EFS_EXTENTS_PER_BB; 598 599 err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount), 600 ex.ex_bn + bboff, NULL, &bp); 601 if (err) { 602 EFS_DPRINTF(("efs_extent_iterator_next: " 603 "efs_bread failed: %d\n", err)); 604 return (err); 605 } 606 607 if (exp != NULL) { 608 dexp = (struct efs_dextent *)bp->b_data + index; 609 efs_dextent_to_extent(dexp, exp); 610 } 611 brelse(bp, 0); 612 613 bboff = exi->exi_innext++ / EFS_EXTENTS_PER_BB; 614 if (bboff >= ex.ex_length) { 615 exi->exi_innext = 0; 616 exi->exi_dnext++; 617 } 618 } 619 620 return (0); 621 } 622