1 /* $NetBSD: efs_subr.c,v 1.14 2021/12/10 20:36:04 andvar Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/cdefs.h> 20 __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.14 2021/12/10 20:36:04 andvar Exp $"); 21 22 #include <sys/param.h> 23 #include <sys/kauth.h> 24 #include <sys/lwp.h> 25 #include <sys/proc.h> 26 #include <sys/buf.h> 27 #include <sys/mount.h> 28 #include <sys/vnode.h> 29 #include <sys/namei.h> 30 #include <sys/stat.h> 31 #include <sys/malloc.h> 32 33 #include <miscfs/genfs/genfs_node.h> 34 35 #include <fs/efs/efs.h> 36 #include <fs/efs/efs_sb.h> 37 #include <fs/efs/efs_dir.h> 38 #include <fs/efs/efs_genfs.h> 39 #include <fs/efs/efs_mount.h> 40 #include <fs/efs/efs_extent.h> 41 #include <fs/efs/efs_dinode.h> 42 #include <fs/efs/efs_inode.h> 43 #include <fs/efs/efs_subr.h> 44 45 struct pool efs_inode_pool; 46 47 /* 48 * Calculate a checksum for the provided superblock in __host byte order__. 49 * 50 * At some point SGI changed the checksum algorithm slightly, which can be 51 * enabled with the 'new' flag. 52 * 53 * Presumably this change occurred on or before 24 Oct 1988 (around IRIX 3.1), 54 * so we're pretty unlikely to ever actually see an old checksum. Further, it 55 * means that EFS_NEWMAGIC filesystems (IRIX >= 3.3) must match the new 56 * checksum whereas EFS_MAGIC filesystems could potentially use either 57 * algorithm. 58 * 59 * See comp.sys.sgi <1991Aug9.050838.16876@odin.corp.sgi.com> 60 */ 61 int32_t 62 efs_sb_checksum(struct efs_sb *esb, int new) 63 { 64 int i; 65 int32_t cksum; 66 uint8_t *sbarray = (uint8_t *)esb; 67 68 KASSERT((EFS_SB_CHECKSUM_SIZE % 2) == 0); 69 70 for (i = cksum = 0; i < EFS_SB_CHECKSUM_SIZE; i += 2) { 71 uint16_t v; 72 memcpy(&v, &sbarray[i], sizeof(v)); 73 cksum ^= be16toh(v); 74 cksum = (cksum << 1) | (new && cksum < 0); 75 } 76 77 return (cksum); 78 } 79 80 /* 81 * Determine if the superblock is valid. 82 * 83 * Returns 0 if valid, else invalid. If invalid, 'why' is set to an 84 * explanation. 85 */ 86 int 87 efs_sb_validate(struct efs_sb *esb, const char **why) 88 { 89 uint32_t ocksum, ncksum; 90 91 *why = NULL; 92 93 if (be32toh(esb->sb_magic) != EFS_SB_MAGIC && 94 be32toh(esb->sb_magic) != EFS_SB_NEWMAGIC) { 95 *why = "sb_magic invalid"; 96 return (1); 97 } 98 99 ocksum = htobe32(efs_sb_checksum(esb, 0)); 100 ncksum = htobe32(efs_sb_checksum(esb, 1)); 101 if (esb->sb_checksum != ocksum && esb->sb_checksum != ncksum) { 102 *why = "sb_checksum invalid"; 103 return (1); 104 } 105 106 if (be32toh(esb->sb_size) > EFS_SIZE_MAX) { 107 *why = "sb_size > EFS_SIZE_MAX"; 108 return (1); 109 } 110 111 if (be32toh(esb->sb_firstcg) <= EFS_BB_BITMAP) { 112 *why = "sb_firstcg <= EFS_BB_BITMAP"; 113 return (1); 114 } 115 116 /* XXX - add better sb consistency checks here */ 117 if (esb->sb_cgfsize == 0 || 118 esb->sb_cgisize == 0 || 119 esb->sb_ncg == 0 || 120 esb->sb_bmsize == 0) { 121 *why = "something bad happened"; 122 return (1); 123 } 124 125 return (0); 126 } 127 128 /* 129 * Determine the basic block offset and inode index within that block, given 130 * the inode 'ino' and filesystem parameters _in host byte order_. The inode 131 * will live at byte address 'bboff' * EFS_BB_SIZE + 'index' * EFS_DINODE_SIZE. 132 */ 133 void 134 efs_locate_inode(ino_t ino, struct efs_sb *sbp, uint32_t *bboff, int *index) 135 { 136 uint32_t cgfsize, firstcg; 137 uint16_t cgisize; 138 139 cgisize = be16toh(sbp->sb_cgisize); 140 cgfsize = be32toh(sbp->sb_cgfsize); 141 firstcg = be32toh(sbp->sb_firstcg); 142 143 *bboff = firstcg + ((ino / (cgisize * EFS_DINODES_PER_BB)) * cgfsize) + 144 ((ino % (cgisize * EFS_DINODES_PER_BB)) / EFS_DINODES_PER_BB); 145 *index = ino & (EFS_DINODES_PER_BB - 1); 146 } 147 148 /* 149 * Read in an inode from disk. 150 * 151 * We actually take in four inodes at a time. Hopefully these will stick 152 * around in the buffer cache and get used without going to disk. 153 * 154 * Returns 0 on success. 155 */ 156 int 157 efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l, 158 struct efs_dinode *di) 159 { 160 struct efs_sb *sbp; 161 struct buf *bp; 162 int index, err; 163 uint32_t bboff; 164 165 sbp = &emp->em_sb; 166 efs_locate_inode(ino, sbp, &bboff, &index); 167 168 err = efs_bread(emp, bboff, l, &bp); 169 if (err) { 170 return (err); 171 } 172 memcpy(di, ((struct efs_dinode *)bp->b_data) + index, sizeof(*di)); 173 brelse(bp, 0); 174 175 return (0); 176 } 177 178 /* 179 * Perform a read from our device handling the potential DEV_BSIZE 180 * messiness (although as of 19.2.2006, all ports appear to use 512) as 181 * we as EFS block sizing. 182 * 183 * bboff: basic block offset 184 * 185 * Returns 0 on success. 186 */ 187 int 188 efs_bread(struct efs_mount *emp, uint32_t bboff, struct lwp *l, struct buf **bp) 189 { 190 KASSERT(bboff < EFS_SIZE_MAX); 191 192 return (bread(emp->em_devvp, (daddr_t)bboff * (EFS_BB_SIZE / DEV_BSIZE), 193 EFS_BB_SIZE, 0, bp)); 194 } 195 196 /* 197 * Synchronise the in-core, host ordered and typed inode fields with their 198 * corresponding on-disk, EFS ordered and typed copies. 199 * 200 * This is the inverse of efs_dinode_sync_inode(), and should be called when 201 * an inode is loaded from disk. 202 */ 203 void 204 efs_sync_dinode_to_inode(struct efs_inode *ei) 205 { 206 207 ei->ei_mode = be16toh(ei->ei_di.di_mode); /*same as nbsd*/ 208 ei->ei_nlink = be16toh(ei->ei_di.di_nlink); 209 ei->ei_uid = be16toh(ei->ei_di.di_uid); 210 ei->ei_gid = be16toh(ei->ei_di.di_gid); 211 ei->ei_size = be32toh(ei->ei_di.di_size); 212 ei->ei_atime = be32toh(ei->ei_di.di_atime); 213 ei->ei_mtime = be32toh(ei->ei_di.di_mtime); 214 ei->ei_ctime = be32toh(ei->ei_di.di_ctime); 215 ei->ei_gen = be32toh(ei->ei_di.di_gen); 216 ei->ei_numextents = be16toh(ei->ei_di.di_numextents); 217 ei->ei_version = ei->ei_di.di_version; 218 } 219 220 /* 221 * Synchronise the on-disk, EFS ordered and typed inode fields with their 222 * corresponding in-core, host ordered and typed copies. 223 * 224 * This is the inverse of efs_inode_sync_dinode(), and should be called before 225 * an inode is flushed to disk. 226 */ 227 void 228 efs_sync_inode_to_dinode(struct efs_inode *ei) 229 { 230 231 panic("readonly -- no need to call me"); 232 } 233 234 #ifdef DIAGNOSTIC 235 /* 236 * Ensure that the in-core inode's host cached fields match its on-disk copy. 237 * 238 * Returns 0 if they match. 239 */ 240 static int 241 efs_is_inode_synced(struct efs_inode *ei) 242 { 243 int s; 244 245 s = 0; 246 /* XXX -- see above remarks about assumption */ 247 s += (ei->ei_mode != be16toh(ei->ei_di.di_mode)); 248 s += (ei->ei_nlink != be16toh(ei->ei_di.di_nlink)); 249 s += (ei->ei_uid != be16toh(ei->ei_di.di_uid)); 250 s += (ei->ei_gid != be16toh(ei->ei_di.di_gid)); 251 s += (ei->ei_size != be32toh(ei->ei_di.di_size)); 252 s += (ei->ei_atime != be32toh(ei->ei_di.di_atime)); 253 s += (ei->ei_mtime != be32toh(ei->ei_di.di_mtime)); 254 s += (ei->ei_ctime != be32toh(ei->ei_di.di_ctime)); 255 s += (ei->ei_gen != be32toh(ei->ei_di.di_gen)); 256 s += (ei->ei_numextents != be16toh(ei->ei_di.di_numextents)); 257 s += (ei->ei_version != ei->ei_di.di_version); 258 259 return (s); 260 } 261 #endif 262 263 /* 264 * Given an efs_dirblk structure and a componentname to search for, return the 265 * corresponding inode if it is found. 266 * 267 * Returns 0 on success. 268 */ 269 static int 270 efs_dirblk_lookup(struct efs_dirblk *dir, struct componentname *cn, 271 ino_t *inode) 272 { 273 struct efs_dirent *de; 274 int i, slot __diagused, offset; 275 276 KASSERT(cn->cn_namelen <= EFS_DIRENT_NAMELEN_MAX); 277 278 slot = offset = 0; 279 280 for (i = 0; i < dir->db_slots; i++) { 281 offset = EFS_DIRENT_OFF_EXPND(dir->db_space[i]); 282 283 if (offset == EFS_DIRBLK_SLOT_FREE) 284 continue; 285 286 de = (struct efs_dirent *)((char *)dir + offset); 287 if (de->de_namelen == cn->cn_namelen && 288 (strncmp(cn->cn_nameptr, de->de_name, cn->cn_namelen) == 0)){ 289 slot = i; 290 break; 291 } 292 } 293 if (i == dir->db_slots) 294 return (ENOENT); 295 296 KASSERT(slot < offset && offset < EFS_DIRBLK_SPACE_SIZE); 297 de = (struct efs_dirent *)((char *)dir + offset); 298 *inode = be32toh(de->de_inumber); 299 300 return (0); 301 } 302 303 /* 304 * Given an extent descriptor that represents a directory, look up 305 * componentname within its efs_dirblk's. If it is found, return the 306 * corresponding inode in 'ino'. 307 * 308 * Returns 0 on success. 309 */ 310 static int 311 efs_extent_lookup(struct efs_mount *emp, struct efs_extent *ex, 312 struct componentname *cn, ino_t *ino) 313 { 314 struct efs_dirblk *db; 315 struct buf *bp; 316 int i, err; 317 318 /* 319 * Read in each of the dirblks until we find our entry. 320 * If we don't, return ENOENT. 321 */ 322 for (i = 0; i < ex->ex_length; i++) { 323 err = efs_bread(emp, ex->ex_bn + i, NULL, &bp); 324 if (err) { 325 printf("efs: warning: invalid extent descriptor\n"); 326 return (err); 327 } 328 329 db = (struct efs_dirblk *)bp->b_data; 330 if (efs_dirblk_lookup(db, cn, ino) == 0) { 331 brelse(bp, 0); 332 return (0); 333 } 334 brelse(bp, 0); 335 } 336 337 return (ENOENT); 338 } 339 340 /* 341 * Given the provided in-core inode, look up the pathname requested. If 342 * we find it, 'ino' reflects its corresponding on-disk inode number. 343 * 344 * Returns 0 on success. 345 */ 346 int 347 efs_inode_lookup(struct efs_mount *emp, struct efs_inode *ei, 348 struct componentname *cn, ino_t *ino) 349 { 350 struct efs_extent ex; 351 struct efs_extent_iterator exi; 352 int ret; 353 354 KASSERT(VOP_ISLOCKED(ei->ei_vp)); 355 #ifdef DIAGNOSTIC 356 KASSERT(efs_is_inode_synced(ei) == 0); 357 #endif 358 KASSERT((ei->ei_mode & S_IFMT) == S_IFDIR); 359 360 efs_extent_iterator_init(&exi, ei, 0); 361 while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) { 362 if (efs_extent_lookup(emp, &ex, cn, ino) == 0) { 363 return (0); 364 } 365 } 366 367 return ((ret == -1) ? ENOENT : ret); 368 } 369 370 /* 371 * Convert on-disk extent structure to in-core format. 372 */ 373 void 374 efs_dextent_to_extent(struct efs_dextent *dex, struct efs_extent *ex) 375 { 376 377 KASSERT(dex != NULL && ex != NULL); 378 379 ex->ex_magic = dex->ex_bytes[0]; 380 ex->ex_bn = be32toh(dex->ex_words[0]) & 0x00ffffff; 381 ex->ex_length = dex->ex_bytes[4]; 382 ex->ex_offset = be32toh(dex->ex_words[1]) & 0x00ffffff; 383 } 384 385 /* 386 * Convert in-core extent format to on-disk structure. 387 */ 388 void 389 efs_extent_to_dextent(struct efs_extent *ex, struct efs_dextent *dex) 390 { 391 392 KASSERT(ex != NULL && dex != NULL); 393 KASSERT(ex->ex_magic == EFS_EXTENT_MAGIC); 394 KASSERT((ex->ex_bn & ~EFS_EXTENT_BN_MASK) == 0); 395 KASSERT((ex->ex_offset & ~EFS_EXTENT_OFFSET_MASK) == 0); 396 397 dex->ex_words[0] = htobe32(ex->ex_bn); 398 dex->ex_bytes[0] = ex->ex_magic; 399 dex->ex_words[1] = htobe32(ex->ex_offset); 400 dex->ex_bytes[4] = ex->ex_length; 401 } 402 403 /* 404 * Initialise an extent iterator. 405 * 406 * If start_hint is non-0, attempt to set up the iterator beginning with the 407 * extent descriptor in which the start_hint'th byte exists. Callers must not 408 * expect success (this is simply an optimisation), so we reserve the right 409 * to start from the beginning. 410 */ 411 void 412 efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip, 413 off_t start_hint) 414 { 415 struct efs_extent ex, ex2; 416 struct buf *bp; 417 struct efs_mount *emp = VFSTOEFS(eip->ei_vp->v_mount); 418 off_t offset, length, next; 419 int i, err, numextents, numinextents; 420 int hi, lo, mid; 421 int indir; 422 423 exi->exi_eip = eip; 424 exi->exi_next = 0; 425 exi->exi_dnext = 0; 426 exi->exi_innext = 0; 427 428 if (start_hint == 0) 429 return; 430 431 /* force iterator to end if hint is too big */ 432 if (start_hint >= eip->ei_size) { 433 exi->exi_next = eip->ei_numextents; 434 return; 435 } 436 437 /* 438 * Use start_hint to jump to the right extent descriptor. We'll 439 * iterate over the 12 indirect extents because it's cheap, then 440 * bring the appropriate vector into core and binary search it. 441 */ 442 443 /* 444 * Handle the small file case separately first... 445 */ 446 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) { 447 for (i = 0; i < eip->ei_numextents; i++) { 448 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex); 449 450 offset = ex.ex_offset * EFS_BB_SIZE; 451 length = ex.ex_length * EFS_BB_SIZE; 452 453 if (start_hint >= offset && 454 start_hint < (offset + length)) { 455 exi->exi_next = exi->exi_dnext = i; 456 return; 457 } 458 } 459 460 /* shouldn't get here, no? */ 461 EFS_DPRINTF(("efs_extent_iterator_init: bad direct extents\n")); 462 return; 463 } 464 465 /* 466 * Now do the large files with indirect extents... 467 * 468 * The first indirect extent's ex_offset field contains the 469 * number of indirect extents used. 470 */ 471 efs_dextent_to_extent(&eip->ei_di.di_extents[0], &ex); 472 473 numinextents = ex.ex_offset; 474 if (numinextents < 1 || numinextents >= EFS_DIRECTEXTENTS) { 475 EFS_DPRINTF(("efs_extent_iterator_init: bad ex.ex_offset\n")); 476 return; 477 } 478 479 next = 0; 480 indir = -1; 481 numextents = 0; 482 for (i = 0; i < numinextents; i++) { 483 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex); 484 485 err = efs_bread(emp, ex.ex_bn, NULL, &bp); 486 if (err) { 487 return; 488 } 489 490 efs_dextent_to_extent((struct efs_dextent *)bp->b_data, &ex2); 491 brelse(bp, 0); 492 493 offset = ex2.ex_offset * EFS_BB_SIZE; 494 495 if (offset > start_hint) { 496 indir = MAX(0, i - 1); 497 break; 498 } 499 500 /* number of extents prior to this indirect vector of extents */ 501 next += numextents; 502 503 /* number of extents within this indirect vector of extents */ 504 numextents = ex.ex_length * EFS_EXTENTS_PER_BB; 505 numextents = MIN(numextents, eip->ei_numextents - next); 506 } 507 508 /* 509 * We hit the end, so assume it's in the last extent. 510 */ 511 if (indir == -1) 512 indir = numinextents - 1; 513 514 /* 515 * Binary search to find our desired direct extent. 516 */ 517 lo = 0; 518 mid = 0; 519 hi = numextents - 1; 520 efs_dextent_to_extent(&eip->ei_di.di_extents[indir], &ex); 521 while (lo <= hi) { 522 int bboff; 523 int index; 524 525 mid = (lo + hi) / 2; 526 527 bboff = mid / EFS_EXTENTS_PER_BB; 528 index = mid % EFS_EXTENTS_PER_BB; 529 530 err = efs_bread(emp, ex.ex_bn + bboff, NULL, &bp); 531 if (err) { 532 EFS_DPRINTF(("efs_extent_iterator_init: bsrch read\n")); 533 return; 534 } 535 536 efs_dextent_to_extent((struct efs_dextent *)bp->b_data + index, 537 &ex2); 538 brelse(bp, 0); 539 540 offset = ex2.ex_offset * EFS_BB_SIZE; 541 length = ex2.ex_length * EFS_BB_SIZE; 542 543 if (start_hint >= offset && start_hint < (offset + length)) 544 break; 545 546 if (start_hint < offset) 547 hi = mid - 1; 548 else 549 lo = mid + 1; 550 } 551 552 /* 553 * This is bad. Either the hint is bogus (which shouldn't 554 * happen) or the extent list must be screwed up. We 555 * have to abort. 556 */ 557 if (lo > hi) { 558 EFS_DPRINTF(("efs_extent_iterator_init: bsearch " 559 "failed to find extent\n")); 560 return; 561 } 562 563 exi->exi_next = next + mid; 564 exi->exi_dnext = indir; 565 exi->exi_innext = mid; 566 } 567 568 /* 569 * Return the next EFS extent. 570 * 571 * Returns 0 if another extent was iterated, -1 if we've exhausted all 572 * extents, or an error number. If 'exi' is non-NULL, the next extent is 573 * written to it (should it exist). 574 */ 575 int 576 efs_extent_iterator_next(struct efs_extent_iterator *exi, 577 struct efs_extent *exp) 578 { 579 struct efs_extent ex; 580 struct efs_dextent *dexp; 581 struct efs_inode *eip = exi->exi_eip; 582 struct buf *bp; 583 int err, bboff, index; 584 585 if (exi->exi_next++ >= eip->ei_numextents) 586 return (-1); 587 588 /* direct or indirect extents? */ 589 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) { 590 if (exp != NULL) { 591 dexp = &eip->ei_di.di_extents[exi->exi_dnext++]; 592 efs_dextent_to_extent(dexp, exp); 593 } 594 } else { 595 efs_dextent_to_extent( 596 &eip->ei_di.di_extents[exi->exi_dnext], &ex); 597 598 bboff = exi->exi_innext / EFS_EXTENTS_PER_BB; 599 index = exi->exi_innext % EFS_EXTENTS_PER_BB; 600 601 err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount), 602 ex.ex_bn + bboff, NULL, &bp); 603 if (err) { 604 EFS_DPRINTF(("efs_extent_iterator_next: " 605 "efs_bread failed: %d\n", err)); 606 return (err); 607 } 608 609 if (exp != NULL) { 610 dexp = (struct efs_dextent *)bp->b_data + index; 611 efs_dextent_to_extent(dexp, exp); 612 } 613 brelse(bp, 0); 614 615 bboff = exi->exi_innext++ / EFS_EXTENTS_PER_BB; 616 if (bboff >= ex.ex_length) { 617 exi->exi_innext = 0; 618 exi->exi_dnext++; 619 } 620 } 621 622 return (0); 623 } 624