151188Sbostic /* 251188Sbostic * Copyright (c) 1991 Regents of the University of California. 351188Sbostic * All rights reserved. 451188Sbostic * 551188Sbostic * %sccs.include.redist.c% 651188Sbostic * 7*51915Sbostic * @(#)lfs_segment.c 7.4 (Berkeley) 12/13/91 851188Sbostic */ 951188Sbostic 1051490Sbostic #include <sys/param.h> 1151490Sbostic #include <sys/systm.h> 1251490Sbostic #include <sys/namei.h> 1351490Sbostic #include <sys/resourcevar.h> 1451490Sbostic #include <sys/kernel.h> 1551490Sbostic #include <sys/file.h> 1651490Sbostic #include <sys/stat.h> 1751490Sbostic #include <sys/buf.h> 1851490Sbostic #include <sys/proc.h> 1951490Sbostic #include <sys/conf.h> 2051490Sbostic #include <sys/vnode.h> 2151490Sbostic #include <sys/specdev.h> 2251490Sbostic #include <sys/fifo.h> 2351490Sbostic #include <sys/malloc.h> 2451490Sbostic #include <sys/mount.h> 25*51915Sbostic #include <sys/kernel.h> /* XXX delete when time goes away */ 2651188Sbostic 2751499Sbostic #include <ufs/ufs/quota.h> 2851499Sbostic #include <ufs/ufs/inode.h> 2951499Sbostic #include <ufs/ufs/dir.h> 3051499Sbostic #include <ufs/ufs/ufsmount.h> 3151490Sbostic 3251499Sbostic #include <ufs/lfs/lfs.h> 3351499Sbostic #include <ufs/lfs/lfs_extern.h> 3451490Sbostic 3551860Sbostic /* In-memory description of a segment about to be written. */ 3651860Sbostic typedef struct segment SEGMENT; 3751860Sbostic struct segment { 3851860Sbostic BUF **bpp; /* pointer to buffer array */ 3951860Sbostic BUF **cbpp; /* pointer to next available bp */ 4051860Sbostic BUF *ibp; /* buffer pointer to inode page */ 4151860Sbostic void *segsum; /* segment summary info */ 4251860Sbostic u_long ninodes; /* number of inodes in this segment */ 4351860Sbostic u_long seg_bytes_left; /* bytes left in segment */ 4451860Sbostic u_long sum_bytes_left; /* bytes left in summary block */ 4551860Sbostic u_long seg_number; /* number of this segment */ 4651860Sbostic #define SEGM_CKP 0x01 /* doing a checkpoint */ 4751860Sbostic u_long seg_flags; /* run-time flags for this segment */ 4851860Sbostic FINFO *fip; /* current fileinfo pointer */ 4951860Sbostic }; 5051860Sbostic 5151188Sbostic /* 5251860Sbostic * Determine if it's OK to start a partial in this segment, or if we need 5351860Sbostic * to go on to a new segment. 5451301Sbostic */ 5551860Sbostic #define LFS_PARTIAL_FITS(fs) \ 5651860Sbostic ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 5751860Sbostic 1 << (fs)->lfs_fsbtodb) 5851188Sbostic 5951860Sbostic #define datosn(fs, daddr) /* disk address to segment number */ \ 6051860Sbostic (((daddr) - (fs)->lfs_sboffs[0]) / fsbtodb((fs), (fs)->lfs_ssize)) 6151860Sbostic 6251860Sbostic #define sntoda(fs, sn) /* segment number to disk address */ \ 6351860Sbostic ((daddr_t)((sn) * ((fs)->lfs_ssize << (fs)->lfs_fsbtodb) + \ 6451860Sbostic (fs)->lfs_sboffs[0])) 6551860Sbostic 6651860Sbostic static int lfs_callback __P((BUF *)); 6751860Sbostic static void lfs_gather __P((struct lfs *, 6851860Sbostic SEGMENT *, VNODE *, int (*) __P((struct lfs *, BUF *)))); 69*51915Sbostic static void lfs_initseg __P((struct lfs *, SEGMENT *)); 7051860Sbostic static BUF *lfs_newbuf __P((struct lfs *, SEGMENT *, daddr_t, size_t)); 71*51915Sbostic static daddr_t lfs_newseg __P((struct lfs *)); 7251499Sbostic static void lfs_updatemeta __P((struct lfs *, 7351860Sbostic SEGMENT *, VNODE *, daddr_t *, BUF **, int)); 7451860Sbostic static void lfs_writefile __P((struct lfs *, SEGMENT *, VNODE *)); 7551860Sbostic static void lfs_writeinode __P((struct lfs *, SEGMENT *, INODE *)); 7651499Sbostic static void lfs_writeseg __P((struct lfs *, SEGMENT *)); 7751860Sbostic static void lfs_writesuper __P((struct lfs *, SEGMENT *)); 7851860Sbostic static int match_data __P((struct lfs *, BUF *)); 7951860Sbostic static int match_dindir __P((struct lfs *, BUF *)); 8051860Sbostic static int match_indir __P((struct lfs *, BUF *)); 8151860Sbostic static int match_tindir __P((struct lfs *, BUF *)); 8251215Sbostic static void shellsort __P((BUF **, daddr_t *, register int)); 8351188Sbostic 8451860Sbostic int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 8551860Sbostic 8651188Sbostic int 8751215Sbostic lfs_segwrite(mp, do_ckp) 8851188Sbostic MOUNT *mp; 8951860Sbostic int do_ckp; /* Do a checkpoint. */ 9051188Sbostic { 9151188Sbostic INODE *ip; 9251499Sbostic struct lfs *fs; 9351188Sbostic VNODE *vp; 9451188Sbostic SEGMENT *sp; 95*51915Sbostic int s, error; 9651188Sbostic 9751860Sbostic #ifdef VERBOSE 9851860Sbostic printf("lfs_segwrite\n"); 9951860Sbostic #endif 10051860Sbostic /* 10151860Sbostic * If doing a checkpoint, we keep a cumulative count of the outstanding 10251860Sbostic * I/O operations. If the disk drive catches up with us it could go to 10351860Sbostic * zero before we finish, so we artificially increment it by one until 10451860Sbostic * we've scheduled all of the writes we intend to do. 10551860Sbostic */ 106*51915Sbostic fs = VFSTOUFS(mp)->um_lfs; 10751860Sbostic if (do_ckp) { 10851860Sbostic s = splbio(); 10951860Sbostic fs->lfs_iocount = 1; 11051860Sbostic splx(s); 11151860Sbostic } 11251342Sbostic 11351301Sbostic /* 11451860Sbostic * Allocate a segment structure and enough space to hold pointers to 11551860Sbostic * the maximum possible number of buffers which can be described in a 11651860Sbostic * single summary block. 11751301Sbostic */ 11851860Sbostic sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK); 11951860Sbostic sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 12051860Sbostic sizeof(daddr_t) + 1) * sizeof(BUF *), M_SEGMENT, M_WAITOK); 12151860Sbostic sp->seg_flags = do_ckp ? SEGM_CKP : 0; 122*51915Sbostic lfs_initseg(fs, sp); 12351188Sbostic loop: 12451188Sbostic for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 12551188Sbostic /* 12651188Sbostic * If the vnode that we are about to sync is no longer 12751188Sbostic * associated with this mount point, start over. 12851188Sbostic */ 12951188Sbostic if (vp->v_mount != mp) 13051188Sbostic goto loop; 13151188Sbostic if (VOP_ISLOCKED(vp)) 13251188Sbostic continue; 13351860Sbostic 134*51915Sbostic /* 135*51915Sbostic * Write the inode/file if dirty and it's not the 136*51915Sbostic * the IFILE. 137*51915Sbostic */ 13851188Sbostic ip = VTOI(vp); 13951860Sbostic if (ip->i_flag & (IMOD | IACC | IUPD | ICHG) == 0 && 14051860Sbostic vp->v_dirtyblkhd == NULL || 14151860Sbostic ip->i_number == LFS_IFILE_INUM) 14251188Sbostic continue; 14351860Sbostic 14451188Sbostic if (vget(vp)) 14551188Sbostic goto loop; 14651860Sbostic lfs_writefile(fs, sp, vp); 147*51915Sbostic lfs_writeinode(fs, sp, ip); 14851188Sbostic vput(vp); 14951188Sbostic } 15051860Sbostic if (do_ckp) { 15151860Sbostic lfs_writefile(fs, sp, fs->lfs_ivnode); 15251860Sbostic lfs_writeinode(fs, sp, VTOI(fs->lfs_ivnode)); 15351860Sbostic } 15451342Sbostic lfs_writeseg(fs, sp); 15551342Sbostic 15651215Sbostic /* 15751860Sbostic * If the I/O count is non-zero, sleep until it reaches zero. At the 15851860Sbostic * moment, the user's process hangs around so we can sleep. 15951215Sbostic */ 16051860Sbostic if (do_ckp) { 16151860Sbostic s = splbio(); 162*51915Sbostic if (--fs->lfs_iocount && 163*51915Sbostic (error = tsleep(&fs->lfs_iocount, PRIBIO + 1, "sync", 0))) 164*51915Sbostic return (error); 16551860Sbostic splx(s); 16651860Sbostic lfs_writesuper(fs, sp); 16751860Sbostic } 16851215Sbostic 16951860Sbostic (void)free(sp->bpp, M_SEGMENT); 17051860Sbostic (void)free(sp, M_SEGMENT); 17151215Sbostic 17251860Sbostic /* Wake up any cleaning processes waiting on this file system. */ 17351860Sbostic wakeup(&fs->lfs_nextseg); 17451860Sbostic wakeup(&lfs_allclean_wakeup); 175*51915Sbostic printf("sync returned\n"); 17651860Sbostic return (0); 17751188Sbostic } 17851188Sbostic 17951860Sbostic /* 18051860Sbostic * Write the dirty blocks associated with a vnode. 18151860Sbostic */ 18251188Sbostic static void 18351860Sbostic lfs_writefile(fs, sp, vp) 18451499Sbostic struct lfs *fs; 18551188Sbostic SEGMENT *sp; 18651860Sbostic VNODE *vp; 18751188Sbostic { 18851860Sbostic struct buf *bp; 18951860Sbostic FINFO *fip; 19051860Sbostic IFILE *ifp; 19151860Sbostic ino_t inum; 19251188Sbostic 19351860Sbostic #ifdef VERBOSE 19451860Sbostic printf("lfs_writefile\n"); 19551860Sbostic #endif 19651860Sbostic inum = VTOI(vp)->i_number; 19751860Sbostic if (vp->v_dirtyblkhd != NULL) { 19851860Sbostic if (sp->seg_bytes_left < fs->lfs_bsize || 19951860Sbostic sp->sum_bytes_left < sizeof(FINFO)) { 20051860Sbostic lfs_writeseg(fs, sp); 201*51915Sbostic lfs_initseg(fs, sp); 20251860Sbostic } 20351860Sbostic sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t); 20451215Sbostic 20551860Sbostic fip = sp->fip; 20651860Sbostic fip->fi_nblocks = 0; 20751860Sbostic if (inum == LFS_IFILE_INUM) 20851860Sbostic fip->fi_version = 1; 20951860Sbostic else { 21051860Sbostic LFS_IENTRY(ifp, fs, inum, bp); 21151860Sbostic fip->fi_version = ifp->if_version; 21251860Sbostic brelse(bp); 21351860Sbostic } 21451860Sbostic fip->fi_ino = inum; 21551188Sbostic 21651860Sbostic /* 21751860Sbostic * It may not be necessary to write the meta-data blocks 21851860Sbostic * at this point, as the roll-forward recovery code should 21951860Sbostic * be able to reconstruct the list. 22051860Sbostic */ 22151860Sbostic lfs_gather(fs, sp, vp, match_data); 22251860Sbostic lfs_gather(fs, sp, vp, match_indir); 22351860Sbostic lfs_gather(fs, sp, vp, match_dindir); 22451860Sbostic #ifdef TRIPLE 22551860Sbostic lfs_gather(fs, sp, vp, match_tindir); 22651860Sbostic #endif 22751342Sbostic 22851860Sbostic fip = sp->fip; 22951860Sbostic #ifdef META 23051860Sbostic printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 23151860Sbostic #endif 23251860Sbostic if (fip->fi_nblocks != 0) { 23351860Sbostic ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 23451860Sbostic sp->fip = (FINFO *)((caddr_t)fip + sizeof(FINFO) + 23551860Sbostic sizeof(daddr_t) * (fip->fi_nblocks - 1)); 23651860Sbostic } 23751860Sbostic } 23851215Sbostic } 23951215Sbostic 24051860Sbostic static void 241*51915Sbostic lfs_writeinode(fs, sp, ip) 242*51915Sbostic struct lfs *fs; 243*51915Sbostic SEGMENT *sp; 244*51915Sbostic INODE *ip; 245*51915Sbostic { 246*51915Sbostic BUF *bp; 247*51915Sbostic daddr_t next_addr; 248*51915Sbostic int ndx; 249*51915Sbostic 250*51915Sbostic #ifdef VERBOSE 251*51915Sbostic printf("lfs_writeinode\n"); 252*51915Sbostic #endif 253*51915Sbostic /* Allocate a new inode block if necessary. */ 254*51915Sbostic if (sp->ibp == NULL) { 255*51915Sbostic /* Allocate a new segment if necessary. */ 256*51915Sbostic if (sp->seg_bytes_left < fs->lfs_bsize || 257*51915Sbostic sp->sum_bytes_left < sizeof(daddr_t)) { 258*51915Sbostic lfs_writeseg(fs, sp); 259*51915Sbostic lfs_initseg(fs, sp); 260*51915Sbostic } 261*51915Sbostic 262*51915Sbostic /* Get next inode block. */ 263*51915Sbostic next_addr = fs->lfs_offset; 264*51915Sbostic fs->lfs_offset += fsbtodb(fs, 1); 265*51915Sbostic sp->ibp = *sp->cbpp++ = 266*51915Sbostic lfs_newbuf(fs, sp, next_addr, fs->lfs_bsize); 267*51915Sbostic 268*51915Sbostic /* Set remaining space counter. */ 269*51915Sbostic sp->seg_bytes_left -= fs->lfs_bsize; 270*51915Sbostic sp->sum_bytes_left -= sizeof(daddr_t); 271*51915Sbostic ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 272*51915Sbostic sp->ninodes / INOPB(fs) - 1; 273*51915Sbostic ((daddr_t *)(sp->segsum))[ndx] = next_addr; 274*51915Sbostic } 275*51915Sbostic 276*51915Sbostic /* Copy the new inode onto the inode page. 277*51915Sbostic * XXX 278*51915Sbostic * Do struct assignment. 279*51915Sbostic */ 280*51915Sbostic bp = sp->ibp; 281*51915Sbostic bcopy(&ip->i_din, 282*51915Sbostic bp->b_un.b_dino + (sp->ninodes % INOPB(fs)), sizeof(DINODE)); 283*51915Sbostic 284*51915Sbostic /* Increment inode count in segment summary block. */ 285*51915Sbostic ++((SEGSUM *)(sp->segsum))->ss_ninos; 286*51915Sbostic 287*51915Sbostic /* If this page is full, set flag to allocate a new page. */ 288*51915Sbostic if (++sp->ninodes % INOPB(fs) == 0) 289*51915Sbostic sp->ibp = NULL; 290*51915Sbostic 291*51915Sbostic /* 292*51915Sbostic * If updating the ifile, update the super-block; otherwise, update 293*51915Sbostic * the ifile itself. In either case, turn off inode update flags. 294*51915Sbostic */ 295*51915Sbostic if (ip->i_number == LFS_IFILE_INUM) 296*51915Sbostic fs->lfs_idaddr = bp->b_blkno; 297*51915Sbostic else 298*51915Sbostic lfs_iset(ip, bp->b_blkno, ip->i_atime); 299*51915Sbostic ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 300*51915Sbostic } 301*51915Sbostic 302*51915Sbostic static void 30351215Sbostic lfs_gather(fs, sp, vp, match) 30451499Sbostic struct lfs *fs; 30551215Sbostic SEGMENT *sp; 30651215Sbostic VNODE *vp; 30751860Sbostic int (*match) __P((struct lfs *, BUF *)); 30851215Sbostic { 30951215Sbostic BUF **bpp, *bp, *nbp; 31051215Sbostic FINFO *fip; 31151215Sbostic INODE *ip; 31251215Sbostic daddr_t *lbp, *start_lbp; 31351342Sbostic u_long version; 31451342Sbostic int s; 31551215Sbostic 31651860Sbostic #ifdef VERBOSE 31751860Sbostic printf("lfs_gather\n"); 31851860Sbostic #endif 31951215Sbostic ip = VTOI(vp); 32051215Sbostic bpp = sp->cbpp; 32151215Sbostic fip = sp->fip; 32251215Sbostic start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 32351215Sbostic 32451215Sbostic s = splbio(); 32551215Sbostic for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { 32651215Sbostic nbp = bp->b_blockf; 327*51915Sbostic /* 328*51915Sbostic * XXX 329*51915Sbostic * Should probably sleep on any BUSY buffer if 330*51915Sbostic * doing an fsync? 331*51915Sbostic */ 33251342Sbostic if (bp->b_flags & B_BUSY) 33351215Sbostic continue; 33451342Sbostic #ifdef DIAGNOSTIC 33551860Sbostic if (!(bp->b_flags & B_DELWRI)) 336*51915Sbostic panic("lfs_gather: bp not B_DELWRI"); 33751860Sbostic if (!(bp->b_flags & B_LOCKED)) 338*51915Sbostic panic("lfs_gather: bp not B_LOCKED"); 33951342Sbostic #endif 34051860Sbostic if (!match(fs, bp)) 34151215Sbostic continue; 34251342Sbostic 34351342Sbostic /* Insert into the buffer list, update the FINFO block. */ 34451342Sbostic *sp->cbpp++ = bp; 34551342Sbostic ++fip->fi_nblocks; 34651215Sbostic *lbp++ = bp->b_lblkno; 34751342Sbostic 34851860Sbostic /* 34951860Sbostic * If full, finish this segment. We may be doing I/O, so 35051860Sbostic * release and reacquire the splbio(). 35151860Sbostic */ 35251215Sbostic sp->sum_bytes_left -= sizeof(daddr_t); 35351215Sbostic sp->seg_bytes_left -= bp->b_bufsize; 35451342Sbostic if (sp->sum_bytes_left < sizeof(daddr_t) || 35551215Sbostic sp->seg_bytes_left < fs->lfs_bsize) { 35651215Sbostic splx(s); 35751342Sbostic lfs_updatemeta(fs, 35851860Sbostic sp, vp, start_lbp, bpp, lbp - start_lbp); 35951215Sbostic 36051342Sbostic /* Add the current file to the segment summary. */ 36151342Sbostic ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 36251215Sbostic 36351342Sbostic version = fip->fi_version; 36451860Sbostic lfs_writeseg(fs, sp); 365*51915Sbostic lfs_initseg(fs, sp); 36651342Sbostic 36751215Sbostic fip = sp->fip; 36851342Sbostic fip->fi_version = version; 36951215Sbostic fip->fi_ino = ip->i_number; 37051342Sbostic start_lbp = lbp = fip->fi_blocks; 37151342Sbostic 37251215Sbostic bpp = sp->cbpp; 37351215Sbostic s = splbio(); 37451215Sbostic } 37551188Sbostic } 37651215Sbostic splx(s); 37751860Sbostic lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); 37851188Sbostic } 37951188Sbostic 38051342Sbostic /* 38151342Sbostic * Update the metadata that points to the blocks listed in the FINFO 38251188Sbostic * array. 38351188Sbostic */ 38451215Sbostic static void 38551860Sbostic lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) 38651499Sbostic struct lfs *fs; 38751215Sbostic SEGMENT *sp; 38851860Sbostic VNODE *vp; 38951215Sbostic daddr_t *lbp; 39051188Sbostic BUF **bpp; 39151215Sbostic int nblocks; 39251188Sbostic { 393*51915Sbostic SEGUSE *sup; 394*51915Sbostic BUF *bp; 39551860Sbostic INDIR a[NIADDR], *ap; 39651860Sbostic INODE *ip; 397*51915Sbostic daddr_t daddr, lbn, off; 39851860Sbostic int db_per_fsb, error, i, num; 39951188Sbostic 40051860Sbostic #ifdef VERBOSE 40151860Sbostic printf("lfs_updatemeta\n"); 40251860Sbostic #endif 40351342Sbostic if (nblocks == 0) 40451215Sbostic return; 40551215Sbostic 406*51915Sbostic /* Sort the blocks. */ 40751215Sbostic shellsort(bpp, lbp, nblocks); 40851215Sbostic 409*51915Sbostic /* 410*51915Sbostic * Assign disk addresses, and update references to the logical 411*51915Sbostic * block and the segment usage information. 412*51915Sbostic */ 41351860Sbostic db_per_fsb = fsbtodb(fs, 1); 414*51915Sbostic for (i = nblocks; i--; ++bpp) { 415*51915Sbostic lbn = *lbp++; 416*51915Sbostic (*bpp)->b_blkno = off = fs->lfs_offset; 41751860Sbostic fs->lfs_offset += db_per_fsb; 41851215Sbostic 41951860Sbostic if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 420*51915Sbostic panic("lfs_updatemeta: lfs_bmaparray returned %d", 421*51915Sbostic error); 42251860Sbostic #ifdef META 42351860Sbostic printf("daddr: %d num: %d\n", daddr, num); 42451860Sbostic if (num != 0) { 42551860Sbostic int x; 426*51915Sbostic printf("array from bmaparray:\n"); 42751860Sbostic for (x = 0; x < num; x++) 42851860Sbostic printf("\tlbn %d off %d\n", a[x].in_lbn, a[x].in_off); 42951860Sbostic } 43051860Sbostic #endif 43151860Sbostic ip = VTOI(vp); 43251860Sbostic switch (num) { 43351860Sbostic case 0: 43451356Sbostic #ifdef META 43551860Sbostic printf("update inode for direct block %d\n", lbn); 43651356Sbostic #endif 437*51915Sbostic ip->i_db[lbn] = off; 43851860Sbostic break; 43951860Sbostic case 1: 440*51915Sbostic ip->i_ib[a[0].in_off] = off; 44151860Sbostic break; 44251860Sbostic default: 44351860Sbostic ap = &a[num - 1]; 44451356Sbostic #ifdef META 44551860Sbostic printf("update indirect block %d offset %d\n", 44651860Sbostic ap->in_lbn, ap->in_off); 44751356Sbostic #endif 44851860Sbostic if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 44951860Sbostic panic("lfs_updatemeta: bread bno %d", 45051860Sbostic ap->in_lbn); 451*51915Sbostic bp->b_un.b_daddr[ap->in_off] = off; 45251342Sbostic lfs_bwrite(bp); 45351188Sbostic } 454*51915Sbostic 455*51915Sbostic /* Update segment usage information. */ 456*51915Sbostic if (daddr != UNASSIGNED) { 457*51915Sbostic LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 458*51915Sbostic sup->su_lastmod = time.tv_sec; 459*51915Sbostic #ifdef DIAGNOSTIC 460*51915Sbostic if (sup->su_nbytes < fs->lfs_bsize) 461*51915Sbostic panic("lfs: negative bytes (segment %d)\n", 462*51915Sbostic datosn(fs, daddr)); 463*51915Sbostic #endif 464*51915Sbostic sup->su_nbytes -= fs->lfs_bsize; 465*51915Sbostic lfs_bwrite(bp); 466*51915Sbostic } 46751188Sbostic } 46851188Sbostic } 46951188Sbostic 470*51915Sbostic /* 471*51915Sbostic * Start a new segment. 472*51915Sbostic */ 47351860Sbostic static void 474*51915Sbostic lfs_initseg(fs, sp) 47551499Sbostic struct lfs *fs; 47651215Sbostic SEGMENT *sp; 47751188Sbostic { 478*51915Sbostic SEGUSE *sup; 479*51915Sbostic SEGSUM *ssp; 480*51915Sbostic struct buf *bp; 481*51915Sbostic daddr_t lbn, *lbnp; 48251215Sbostic 48351860Sbostic #ifdef VERBOSE 484*51915Sbostic printf("lfs_initseg\n"); 48551860Sbostic #endif 486*51915Sbostic /* Advance to the next segment. */ 487*51915Sbostic if (1 || !LFS_PARTIAL_FITS(fs)) { 488*51915Sbostic LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp); 489*51915Sbostic sup->su_flags &= ~SEGUSE_ACTIVE; 490*51915Sbostic lfs_bwrite(bp); 491*51915Sbostic fs->lfs_curseg = fs->lfs_offset = fs->lfs_nextseg; 492*51915Sbostic fs->lfs_nextseg = lfs_newseg(fs); 493*51915Sbostic sp->seg_number = datosn(fs, fs->lfs_curseg); 494*51915Sbostic sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 495*51915Sbostic 496*51915Sbostic /* 497*51915Sbostic * If su_nbytes is non-zero after the segment was cleaned, 498*51915Sbostic * the segment contains a super-block. Update offset and 499*51915Sbostic * summary address to skip over the superblock. 500*51915Sbostic */ 501*51915Sbostic LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 502*51915Sbostic if (sup->su_nbytes != 0) { 503*51915Sbostic fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 504*51915Sbostic sp->seg_bytes_left -= LFS_SBPAD; 50551215Sbostic } 506*51915Sbostic brelse(bp); 507*51915Sbostic } else { 508*51915Sbostic sp->seg_number = datosn(fs, fs->lfs_curseg); 509*51915Sbostic sp->seg_bytes_left = (fs->lfs_dbpseg - 510*51915Sbostic (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 511*51915Sbostic } 51251342Sbostic 513*51915Sbostic sp->ibp = NULL; 514*51915Sbostic sp->ninodes = 0; 51551342Sbostic 516*51915Sbostic /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 517*51915Sbostic sp->cbpp = sp->bpp; 518*51915Sbostic *sp->cbpp = lfs_newbuf(fs, sp, fs->lfs_offset, LFS_SUMMARY_SIZE); 519*51915Sbostic sp->segsum = (*sp->cbpp)->b_un.b_addr; 520*51915Sbostic ++sp->cbpp; 521*51915Sbostic fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 52251342Sbostic 523*51915Sbostic /* Set point to SEGSUM, initialize it. */ 524*51915Sbostic ssp = sp->segsum; 525*51915Sbostic ssp->ss_next = fs->lfs_nextseg; 526*51915Sbostic ssp->ss_create = time.tv_sec; 527*51915Sbostic ssp->ss_nfinfo = ssp->ss_ninos = 0; 52851342Sbostic 529*51915Sbostic /* Set pointer to first FINFO, initialize it. */ 530*51915Sbostic sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM)); 531*51915Sbostic sp->fip->fi_nblocks = 0; 53251342Sbostic 533*51915Sbostic sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 534*51915Sbostic sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 535*51915Sbostic } 53651342Sbostic 537*51915Sbostic /* 538*51915Sbostic * Return the next segment to write. 539*51915Sbostic */ 540*51915Sbostic static daddr_t 541*51915Sbostic lfs_newseg(fs) 542*51915Sbostic struct lfs *fs; 543*51915Sbostic { 544*51915Sbostic SEGUSE *sup; 545*51915Sbostic struct buf *bp; 546*51915Sbostic int isdirty, segnum, sn; 547*51915Sbostic 548*51915Sbostic #ifdef VERBOSE 549*51915Sbostic printf("lfs_newseg\n"); 550*51915Sbostic #endif 551*51915Sbostic segnum = datosn(fs, fs->lfs_nextseg); 552*51915Sbostic LFS_SEGENTRY(sup, fs, segnum, bp); 553*51915Sbostic sup->su_flags |= SEGUSE_ACTIVE; 554*51915Sbostic lfs_bwrite(bp); 555*51915Sbostic for (sn = segnum;;) { 556*51915Sbostic sn = (sn + 1) % fs->lfs_nseg; 557*51915Sbostic if (sn == segnum) 558*51915Sbostic panic("lfs_nextseg: no clean segments"); 559*51915Sbostic LFS_SEGENTRY(sup, fs, sn, bp); 560*51915Sbostic isdirty = sup->su_flags & SEGUSE_DIRTY; 561*51915Sbostic brelse(bp); 562*51915Sbostic if (!isdirty) 563*51915Sbostic break; 564*51915Sbostic } 565*51915Sbostic return (sntoda(fs, sn)); 56651188Sbostic } 56751188Sbostic 56851188Sbostic static void 56951188Sbostic lfs_writeseg(fs, sp) 57051499Sbostic struct lfs *fs; 57151188Sbostic SEGMENT *sp; 57251188Sbostic { 573*51915Sbostic BUF **bpp, *bp; 57451188Sbostic SEGUSE *sup; 57551860Sbostic SEGSUM *segp; 57651860Sbostic dev_t i_dev; 57751860Sbostic u_long *datap, *dp; 57851342Sbostic void *pmeta; 57951860Sbostic int flags, i, nblocks, s, (*strategy) __P((BUF *)); 58051188Sbostic 58151860Sbostic #ifdef VERBOSE 58251860Sbostic printf("lfs_writeseg\n"); 58351860Sbostic #endif 58451342Sbostic /* Update superblock segment address. */ 58551342Sbostic fs->lfs_lastseg = sntoda(fs, sp->seg_number); 58651860Sbostic nblocks = sp->cbpp - sp->bpp; 58751860Sbostic 58851860Sbostic LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 58951860Sbostic sup->su_nbytes += LFS_SUMMARY_SIZE + (nblocks - 1 << fs->lfs_bshift); 59051860Sbostic sup->su_lastmod = time.tv_sec; 59151860Sbostic sup->su_flags = SEGUSE_DIRTY; 59251860Sbostic lfs_bwrite(bp); 59351188Sbostic 59451188Sbostic /* 595*51915Sbostic * Compute checksum across data and then across summary; 596*51915Sbostic * the first block (the summary block) is skipped. 59751860Sbostic * 59851860Sbostic * XXX 59951860Sbostic * Fix this to do it inline, instead of malloc/copy. 60051188Sbostic */ 60151860Sbostic datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 602*51915Sbostic for (bpp = sp->bpp, i = nblocks - 1; i--;) 603*51915Sbostic *dp++ = (*++bpp)->b_un.b_words[0]; 60451860Sbostic 60551860Sbostic segp = (SEGSUM *)sp->segsum; 60651860Sbostic segp->ss_datasum = cksum(datap, nblocks * sizeof(u_long)); 60751860Sbostic segp->ss_sumsum = cksum(&segp->ss_datasum, 60851860Sbostic LFS_SUMMARY_SIZE - sizeof(segp->ss_sumsum)); 60951860Sbostic (void)free(datap, M_SEGMENT); 61051188Sbostic 61151188Sbostic /* 61251860Sbostic * When we gathered the blocks for I/O we did not mark them busy or 61351860Sbostic * remove them from the freelist. As we do this, turn off the B_LOCKED 61451860Sbostic * bit so the future brelse will put them on the LRU list, and add the 61551860Sbostic * B_CALL flags if we're doing a checkpoint so we can count I/O's. LFS 61651860Sbostic * requires that the super blocks (on checkpoint) be written after all 61751860Sbostic * the segment data. 61851188Sbostic */ 61951860Sbostic i_dev = VTOI(fs->lfs_ivnode)->i_dev; 62051860Sbostic strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 62151301Sbostic 62251301Sbostic s = splbio(); 62351860Sbostic if (sp->seg_flags & SEGM_CKP) { 62451860Sbostic fs->lfs_iocount += nblocks; 625*51915Sbostic flags = B_ASYNC | B_BUSY | B_CALL; 62651860Sbostic } else 627*51915Sbostic flags = B_ASYNC | B_BUSY; 62851860Sbostic for (bpp = sp->bpp, i = nblocks; i--;) { 62951860Sbostic bp = *bpp++; 63051860Sbostic bp->b_flags |= flags; 631*51915Sbostic bp->b_flags &= 632*51915Sbostic ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED); 63351860Sbostic bp->b_dev = i_dev; 63451860Sbostic bp->b_iodone = lfs_callback; 63551860Sbostic if (!(bp->b_flags & B_NOCACHE)) { 63651860Sbostic bremfree(bp); 63751860Sbostic reassignbuf(bp, bp->b_vp); 63851860Sbostic } 63951860Sbostic } 64051301Sbostic splx(s); 64151860Sbostic 64251860Sbostic for (bpp = sp->bpp, i = nblocks; i--;) 64351860Sbostic (strategy)(*bpp++); 64451188Sbostic } 64551188Sbostic 64651215Sbostic static void 64751860Sbostic lfs_writesuper(fs, sp) 64851499Sbostic struct lfs *fs; 64951860Sbostic SEGMENT *sp; 65051301Sbostic { 65151301Sbostic BUF *bp; 65251860Sbostic dev_t i_dev; 65351342Sbostic int (*strategy) __P((BUF *)); 65451301Sbostic 655*51915Sbostic return; 65651860Sbostic #ifdef VERBOSE 65751860Sbostic printf("lfs_writesuper\n"); 65851860Sbostic #endif 65951860Sbostic i_dev = VTOI(fs->lfs_ivnode)->i_dev; 66051860Sbostic strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 66151356Sbostic 66251342Sbostic /* Checksum the superblock and copy it into a buffer. */ 66351499Sbostic fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 66451860Sbostic bp = lfs_newbuf(fs, sp, fs->lfs_sboffs[0], LFS_SBPAD); 66551860Sbostic *bp->b_un.b_lfs = *fs; 66651215Sbostic 66751356Sbostic /* Write the first superblock (wait). */ 66851860Sbostic bp->b_dev = i_dev; 669*51915Sbostic bp->b_flags |= B_BUSY; 67051860Sbostic bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 67151342Sbostic (strategy)(bp); 67251215Sbostic biowait(bp); 67351342Sbostic 67451356Sbostic /* Write the second superblock (don't wait). */ 67551215Sbostic bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 676*51915Sbostic bp->b_flags |= B_ASYNC | B_BUSY; 67751860Sbostic bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 67851342Sbostic (strategy)(bp); 67951215Sbostic } 68051215Sbostic 68151342Sbostic /* 68251342Sbostic * Logical block number match routines used when traversing the dirty block 68351342Sbostic * chain. 68451342Sbostic */ 68551490Sbostic static int 68651860Sbostic match_data(fs, bp) 68751860Sbostic struct lfs *fs; 68851215Sbostic BUF *bp; 68951215Sbostic { 69051342Sbostic return (bp->b_lblkno >= 0); 69151215Sbostic } 69251215Sbostic 69351490Sbostic static int 69451860Sbostic match_indir(fs, bp) 69551860Sbostic struct lfs *fs; 69651215Sbostic BUF *bp; 69751215Sbostic { 69851860Sbostic int lbn; 69951860Sbostic 70051860Sbostic lbn = bp->b_lblkno; 70151860Sbostic return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 70251215Sbostic } 70351215Sbostic 70451490Sbostic static int 70551860Sbostic match_dindir(fs, bp) 70651860Sbostic struct lfs *fs; 70751215Sbostic BUF *bp; 70851215Sbostic { 70951860Sbostic int lbn; 71051860Sbostic 71151860Sbostic lbn = bp->b_lblkno; 71251860Sbostic return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 71351215Sbostic } 71451215Sbostic 71551860Sbostic static int 71651860Sbostic match_tindir(fs, bp) 71751499Sbostic struct lfs *fs; 71851860Sbostic BUF *bp; 71951342Sbostic { 72051860Sbostic int lbn; 72151342Sbostic 72251860Sbostic lbn = bp->b_lblkno; 72351860Sbostic return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 72451860Sbostic } 72551342Sbostic 72651860Sbostic /* 72751860Sbostic * Allocate a new buffer header. 72851860Sbostic */ 72951860Sbostic static BUF * 73051860Sbostic lfs_newbuf(fs, sp, daddr, size) 73151860Sbostic struct lfs *fs; 73251860Sbostic SEGMENT *sp; 73351860Sbostic daddr_t daddr; 73451860Sbostic size_t size; 73551860Sbostic { 73651860Sbostic BUF *bp; 73751342Sbostic 73851860Sbostic #ifdef VERBOSE 73951860Sbostic printf("lfs_newbuf\n"); 74051860Sbostic #endif 74151860Sbostic bp = getnewbuf(); 74251860Sbostic bremhash(bp); 74351860Sbostic bgetvp(fs->lfs_ivnode, bp); 74451860Sbostic bp->b_bcount = 0; 74551860Sbostic bp->b_lblkno = daddr; 74651860Sbostic bp->b_blkno = daddr; 74751860Sbostic bp->b_error = 0; 74851860Sbostic bp->b_resid = 0; 74951860Sbostic allocbuf(bp, size); 75051860Sbostic bp->b_flags |= B_NOCACHE; 751*51915Sbostic binshash(bp, &bfreelist[BQ_AGE]); 75251860Sbostic return (bp); 75351860Sbostic } 75451342Sbostic 75551860Sbostic /* 75651860Sbostic * The buffer cache callback routine. 75751860Sbostic */ 75851860Sbostic static int /* XXX should be void */ 75951860Sbostic lfs_callback(bp) 76051860Sbostic BUF *bp; 76151860Sbostic { 76251860Sbostic struct lfs *fs; 76351342Sbostic 76451860Sbostic fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 76551860Sbostic #ifdef DIAGNOSTIC 76651860Sbostic if (fs->lfs_iocount == 0) 76751860Sbostic panic("lfs_callback: zero iocount\n"); 76851860Sbostic #endif 76951860Sbostic if (--fs->lfs_iocount == 0) 77051860Sbostic wakeup(&fs->lfs_iocount); 771*51915Sbostic 77251860Sbostic brelse(bp); 77351860Sbostic } 77451342Sbostic 77551215Sbostic /* 77651188Sbostic * Shellsort (diminishing increment sort) from Data Structures and 77751188Sbostic * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 77851188Sbostic * see also Knuth Vol. 3, page 84. The increments are selected from 77951188Sbostic * formula (8), page 95. Roughly O(N^3/2). 78051188Sbostic */ 78151188Sbostic /* 78251188Sbostic * This is our own private copy of shellsort because we want to sort 78351188Sbostic * two parallel arrays (the array of buffer pointers and the array of 78451188Sbostic * logical block numbers) simultaneously. Note that we cast the array 78551188Sbostic * of logical block numbers to a unsigned in this routine so that the 78651188Sbostic * negative block numbers (meta data blocks) sort AFTER the data blocks. 78751188Sbostic */ 78851188Sbostic static void 78951188Sbostic shellsort(bp_array, lb_array, nmemb) 79051188Sbostic BUF **bp_array; 79151215Sbostic daddr_t *lb_array; 79251188Sbostic register int nmemb; 79351188Sbostic { 79451188Sbostic static int __rsshell_increments[] = { 4, 1, 0 }; 79551188Sbostic register int incr, *incrp, t1, t2; 79651188Sbostic BUF *bp_temp; 79751188Sbostic u_long lb_temp; 79851188Sbostic 79951188Sbostic for (incrp = __rsshell_increments; incr = *incrp++;) 80051188Sbostic for (t1 = incr; t1 < nmemb; ++t1) 80151188Sbostic for (t2 = t1 - incr; t2 >= 0;) 80251188Sbostic if (lb_array[t2] > lb_array[t2 + incr]) { 80351188Sbostic lb_temp = lb_array[t2]; 80451188Sbostic lb_array[t2] = lb_array[t2 + incr]; 80551188Sbostic lb_array[t2 + incr] = lb_temp; 80651188Sbostic bp_temp = bp_array[t2]; 80751188Sbostic bp_array[t2] = bp_array[t2 + incr]; 80851188Sbostic bp_array[t2 + incr] = bp_temp; 80951188Sbostic t2 -= incr; 81051188Sbostic } else 81151188Sbostic break; 81251188Sbostic } 813