151188Sbostic /* 251188Sbostic * Copyright (c) 1991 Regents of the University of California. 351188Sbostic * All rights reserved. 451188Sbostic * 551188Sbostic * %sccs.include.redist.c% 651188Sbostic * 7*51927Sbostic * @(#)lfs_segment.c 7.6 (Berkeley) 12/14/91 851188Sbostic */ 951188Sbostic 1051490Sbostic #include <sys/param.h> 1151490Sbostic #include <sys/systm.h> 1251490Sbostic #include <sys/namei.h> 1351490Sbostic #include <sys/resourcevar.h> 1451490Sbostic #include <sys/kernel.h> 1551490Sbostic #include <sys/file.h> 1651490Sbostic #include <sys/stat.h> 1751490Sbostic #include <sys/buf.h> 1851490Sbostic #include <sys/proc.h> 1951490Sbostic #include <sys/conf.h> 2051490Sbostic #include <sys/vnode.h> 2151490Sbostic #include <sys/specdev.h> 2251490Sbostic #include <sys/fifo.h> 2351490Sbostic #include <sys/malloc.h> 2451490Sbostic #include <sys/mount.h> 2551915Sbostic #include <sys/kernel.h> /* XXX delete when time goes away */ 2651188Sbostic 2751499Sbostic #include <ufs/ufs/quota.h> 2851499Sbostic #include <ufs/ufs/inode.h> 2951499Sbostic #include <ufs/ufs/dir.h> 3051499Sbostic #include <ufs/ufs/ufsmount.h> 3151490Sbostic 3251499Sbostic #include <ufs/lfs/lfs.h> 3351499Sbostic #include <ufs/lfs/lfs_extern.h> 3451490Sbostic 3551860Sbostic /* In-memory description of a segment about to be written. */ 3651860Sbostic typedef struct segment SEGMENT; 3751860Sbostic struct segment { 3851860Sbostic BUF **bpp; /* pointer to buffer array */ 3951860Sbostic BUF **cbpp; /* pointer to next available bp */ 4051860Sbostic BUF *ibp; /* buffer pointer to inode page */ 4151860Sbostic void *segsum; /* segment summary info */ 4251860Sbostic u_long ninodes; /* number of inodes in this segment */ 4351860Sbostic u_long seg_bytes_left; /* bytes left in segment */ 4451860Sbostic u_long sum_bytes_left; /* bytes left in summary block */ 4551860Sbostic u_long seg_number; /* number of this segment */ 4651860Sbostic #define SEGM_CKP 0x01 /* doing a checkpoint */ 4751860Sbostic u_long seg_flags; /* run-time flags for this segment */ 4851860Sbostic FINFO *fip; /* current fileinfo pointer */ 4951860Sbostic }; 5051860Sbostic 5151188Sbostic /* 5251860Sbostic * Determine if it's OK to start a partial in this segment, or if we need 5351860Sbostic * to go on to a new segment. 5451301Sbostic */ 5551860Sbostic #define LFS_PARTIAL_FITS(fs) \ 5651860Sbostic ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 5751860Sbostic 1 << (fs)->lfs_fsbtodb) 5851188Sbostic 5951860Sbostic #define datosn(fs, daddr) /* disk address to segment number */ \ 6051860Sbostic (((daddr) - (fs)->lfs_sboffs[0]) / fsbtodb((fs), (fs)->lfs_ssize)) 6151860Sbostic 6251860Sbostic #define sntoda(fs, sn) /* segment number to disk address */ \ 6351860Sbostic ((daddr_t)((sn) * ((fs)->lfs_ssize << (fs)->lfs_fsbtodb) + \ 6451860Sbostic (fs)->lfs_sboffs[0])) 6551860Sbostic 6651860Sbostic static int lfs_callback __P((BUF *)); 6751860Sbostic static void lfs_gather __P((struct lfs *, 6851860Sbostic SEGMENT *, VNODE *, int (*) __P((struct lfs *, BUF *)))); 6951915Sbostic static void lfs_initseg __P((struct lfs *, SEGMENT *)); 7051860Sbostic static BUF *lfs_newbuf __P((struct lfs *, SEGMENT *, daddr_t, size_t)); 71*51927Sbostic static void lfs_newseg __P((struct lfs *)); 7251499Sbostic static void lfs_updatemeta __P((struct lfs *, 7351860Sbostic SEGMENT *, VNODE *, daddr_t *, BUF **, int)); 7451860Sbostic static void lfs_writefile __P((struct lfs *, SEGMENT *, VNODE *)); 7551860Sbostic static void lfs_writeinode __P((struct lfs *, SEGMENT *, INODE *)); 7651499Sbostic static void lfs_writeseg __P((struct lfs *, SEGMENT *)); 7751860Sbostic static void lfs_writesuper __P((struct lfs *, SEGMENT *)); 7851860Sbostic static int match_data __P((struct lfs *, BUF *)); 7951860Sbostic static int match_dindir __P((struct lfs *, BUF *)); 8051860Sbostic static int match_indir __P((struct lfs *, BUF *)); 8151860Sbostic static int match_tindir __P((struct lfs *, BUF *)); 8251215Sbostic static void shellsort __P((BUF **, daddr_t *, register int)); 8351188Sbostic 8451860Sbostic int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 8551860Sbostic 8651188Sbostic int 8751215Sbostic lfs_segwrite(mp, do_ckp) 8851188Sbostic MOUNT *mp; 8951860Sbostic int do_ckp; /* Do a checkpoint. */ 9051188Sbostic { 9151188Sbostic INODE *ip; 9251499Sbostic struct lfs *fs; 9351188Sbostic VNODE *vp; 9451188Sbostic SEGMENT *sp; 9551915Sbostic int s, error; 9651188Sbostic 9751860Sbostic #ifdef VERBOSE 9851860Sbostic printf("lfs_segwrite\n"); 9951860Sbostic #endif 10051860Sbostic /* 10151860Sbostic * If doing a checkpoint, we keep a cumulative count of the outstanding 10251860Sbostic * I/O operations. If the disk drive catches up with us it could go to 10351860Sbostic * zero before we finish, so we artificially increment it by one until 10451860Sbostic * we've scheduled all of the writes we intend to do. 10551860Sbostic */ 10651915Sbostic fs = VFSTOUFS(mp)->um_lfs; 10751860Sbostic if (do_ckp) { 10851860Sbostic s = splbio(); 10951860Sbostic fs->lfs_iocount = 1; 11051860Sbostic splx(s); 11151860Sbostic } 11251342Sbostic 11351301Sbostic /* 11451860Sbostic * Allocate a segment structure and enough space to hold pointers to 11551860Sbostic * the maximum possible number of buffers which can be described in a 11651860Sbostic * single summary block. 11751301Sbostic */ 11851860Sbostic sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK); 11951860Sbostic sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 12051860Sbostic sizeof(daddr_t) + 1) * sizeof(BUF *), M_SEGMENT, M_WAITOK); 12151860Sbostic sp->seg_flags = do_ckp ? SEGM_CKP : 0; 12251915Sbostic lfs_initseg(fs, sp); 12351188Sbostic loop: 12451188Sbostic for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 12551188Sbostic /* 12651188Sbostic * If the vnode that we are about to sync is no longer 12751188Sbostic * associated with this mount point, start over. 12851188Sbostic */ 12951188Sbostic if (vp->v_mount != mp) 13051188Sbostic goto loop; 13151188Sbostic if (VOP_ISLOCKED(vp)) 13251188Sbostic continue; 13351860Sbostic 13451915Sbostic /* 13551915Sbostic * Write the inode/file if dirty and it's not the 13651915Sbostic * the IFILE. 13751915Sbostic */ 13851188Sbostic ip = VTOI(vp); 13951860Sbostic if (ip->i_flag & (IMOD | IACC | IUPD | ICHG) == 0 && 14051860Sbostic vp->v_dirtyblkhd == NULL || 14151860Sbostic ip->i_number == LFS_IFILE_INUM) 14251188Sbostic continue; 14351860Sbostic 14451188Sbostic if (vget(vp)) 14551188Sbostic goto loop; 14651860Sbostic lfs_writefile(fs, sp, vp); 14751915Sbostic lfs_writeinode(fs, sp, ip); 14851188Sbostic vput(vp); 14951188Sbostic } 15051860Sbostic if (do_ckp) { 15151860Sbostic lfs_writefile(fs, sp, fs->lfs_ivnode); 15251860Sbostic lfs_writeinode(fs, sp, VTOI(fs->lfs_ivnode)); 15351860Sbostic } 15451342Sbostic lfs_writeseg(fs, sp); 15551342Sbostic 15651215Sbostic /* 15751860Sbostic * If the I/O count is non-zero, sleep until it reaches zero. At the 15851860Sbostic * moment, the user's process hangs around so we can sleep. 15951215Sbostic */ 16051860Sbostic if (do_ckp) { 16151860Sbostic s = splbio(); 16251915Sbostic if (--fs->lfs_iocount && 16351915Sbostic (error = tsleep(&fs->lfs_iocount, PRIBIO + 1, "sync", 0))) 16451915Sbostic return (error); 16551860Sbostic splx(s); 16651860Sbostic lfs_writesuper(fs, sp); 16751860Sbostic } 16851215Sbostic 169*51927Sbostic free(sp->bpp, M_SEGMENT); 170*51927Sbostic free(sp, M_SEGMENT); 17151215Sbostic 17251860Sbostic /* Wake up any cleaning processes waiting on this file system. */ 17351860Sbostic wakeup(&fs->lfs_nextseg); 17451860Sbostic wakeup(&lfs_allclean_wakeup); 17551915Sbostic printf("sync returned\n"); 17651860Sbostic return (0); 17751188Sbostic } 17851188Sbostic 17951860Sbostic /* 18051860Sbostic * Write the dirty blocks associated with a vnode. 18151860Sbostic */ 18251188Sbostic static void 18351860Sbostic lfs_writefile(fs, sp, vp) 18451499Sbostic struct lfs *fs; 18551188Sbostic SEGMENT *sp; 18651860Sbostic VNODE *vp; 18751188Sbostic { 18851860Sbostic struct buf *bp; 18951860Sbostic FINFO *fip; 19051860Sbostic IFILE *ifp; 19151860Sbostic ino_t inum; 19251188Sbostic 19351860Sbostic #ifdef VERBOSE 19451860Sbostic printf("lfs_writefile\n"); 19551860Sbostic #endif 19651860Sbostic inum = VTOI(vp)->i_number; 19751860Sbostic if (vp->v_dirtyblkhd != NULL) { 19851860Sbostic if (sp->seg_bytes_left < fs->lfs_bsize || 19951860Sbostic sp->sum_bytes_left < sizeof(FINFO)) { 20051860Sbostic lfs_writeseg(fs, sp); 20151915Sbostic lfs_initseg(fs, sp); 20251860Sbostic } 20351860Sbostic sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t); 20451215Sbostic 20551860Sbostic fip = sp->fip; 20651860Sbostic fip->fi_nblocks = 0; 20751860Sbostic if (inum == LFS_IFILE_INUM) 20851860Sbostic fip->fi_version = 1; 20951860Sbostic else { 21051860Sbostic LFS_IENTRY(ifp, fs, inum, bp); 21151860Sbostic fip->fi_version = ifp->if_version; 21251860Sbostic brelse(bp); 21351860Sbostic } 21451860Sbostic fip->fi_ino = inum; 21551188Sbostic 21651860Sbostic /* 21751860Sbostic * It may not be necessary to write the meta-data blocks 21851860Sbostic * at this point, as the roll-forward recovery code should 21951860Sbostic * be able to reconstruct the list. 22051860Sbostic */ 22151860Sbostic lfs_gather(fs, sp, vp, match_data); 22251860Sbostic lfs_gather(fs, sp, vp, match_indir); 22351860Sbostic lfs_gather(fs, sp, vp, match_dindir); 22451860Sbostic #ifdef TRIPLE 22551860Sbostic lfs_gather(fs, sp, vp, match_tindir); 22651860Sbostic #endif 22751342Sbostic 22851860Sbostic fip = sp->fip; 22951860Sbostic #ifdef META 23051860Sbostic printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 23151860Sbostic #endif 23251860Sbostic if (fip->fi_nblocks != 0) { 23351860Sbostic ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 23451860Sbostic sp->fip = (FINFO *)((caddr_t)fip + sizeof(FINFO) + 23551860Sbostic sizeof(daddr_t) * (fip->fi_nblocks - 1)); 23651860Sbostic } 23751860Sbostic } 23851215Sbostic } 23951215Sbostic 24051860Sbostic static void 24151915Sbostic lfs_writeinode(fs, sp, ip) 24251915Sbostic struct lfs *fs; 24351915Sbostic SEGMENT *sp; 24451915Sbostic INODE *ip; 24551915Sbostic { 24651915Sbostic BUF *bp; 24751915Sbostic daddr_t next_addr; 24851915Sbostic int ndx; 24951915Sbostic 25051915Sbostic #ifdef VERBOSE 25151915Sbostic printf("lfs_writeinode\n"); 25251915Sbostic #endif 25351915Sbostic /* Allocate a new inode block if necessary. */ 25451915Sbostic if (sp->ibp == NULL) { 25551915Sbostic /* Allocate a new segment if necessary. */ 25651915Sbostic if (sp->seg_bytes_left < fs->lfs_bsize || 25751915Sbostic sp->sum_bytes_left < sizeof(daddr_t)) { 25851915Sbostic lfs_writeseg(fs, sp); 25951915Sbostic lfs_initseg(fs, sp); 26051915Sbostic } 26151915Sbostic 26251915Sbostic /* Get next inode block. */ 26351915Sbostic next_addr = fs->lfs_offset; 26451915Sbostic fs->lfs_offset += fsbtodb(fs, 1); 26551915Sbostic sp->ibp = *sp->cbpp++ = 26651915Sbostic lfs_newbuf(fs, sp, next_addr, fs->lfs_bsize); 26751915Sbostic 26851915Sbostic /* Set remaining space counter. */ 26951915Sbostic sp->seg_bytes_left -= fs->lfs_bsize; 27051915Sbostic sp->sum_bytes_left -= sizeof(daddr_t); 27151915Sbostic ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 27251915Sbostic sp->ninodes / INOPB(fs) - 1; 27351915Sbostic ((daddr_t *)(sp->segsum))[ndx] = next_addr; 27451915Sbostic } 27551915Sbostic 27651915Sbostic /* Copy the new inode onto the inode page. 27751915Sbostic * XXX 27851915Sbostic * Do struct assignment. 27951915Sbostic */ 28051915Sbostic bp = sp->ibp; 28151915Sbostic bcopy(&ip->i_din, 28251915Sbostic bp->b_un.b_dino + (sp->ninodes % INOPB(fs)), sizeof(DINODE)); 28351915Sbostic 28451915Sbostic /* Increment inode count in segment summary block. */ 28551915Sbostic ++((SEGSUM *)(sp->segsum))->ss_ninos; 28651915Sbostic 28751915Sbostic /* If this page is full, set flag to allocate a new page. */ 28851915Sbostic if (++sp->ninodes % INOPB(fs) == 0) 28951915Sbostic sp->ibp = NULL; 29051915Sbostic 29151915Sbostic /* 29251915Sbostic * If updating the ifile, update the super-block; otherwise, update 29351915Sbostic * the ifile itself. In either case, turn off inode update flags. 29451915Sbostic */ 29551915Sbostic if (ip->i_number == LFS_IFILE_INUM) 29651915Sbostic fs->lfs_idaddr = bp->b_blkno; 29751915Sbostic else 29851915Sbostic lfs_iset(ip, bp->b_blkno, ip->i_atime); 29951915Sbostic ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 30051915Sbostic } 30151915Sbostic 30251915Sbostic static void 30351215Sbostic lfs_gather(fs, sp, vp, match) 30451499Sbostic struct lfs *fs; 30551215Sbostic SEGMENT *sp; 30651215Sbostic VNODE *vp; 30751860Sbostic int (*match) __P((struct lfs *, BUF *)); 30851215Sbostic { 30951215Sbostic BUF **bpp, *bp, *nbp; 31051215Sbostic FINFO *fip; 31151215Sbostic INODE *ip; 31251215Sbostic daddr_t *lbp, *start_lbp; 31351342Sbostic u_long version; 31451342Sbostic int s; 31551215Sbostic 31651860Sbostic #ifdef VERBOSE 31751860Sbostic printf("lfs_gather\n"); 31851860Sbostic #endif 31951215Sbostic ip = VTOI(vp); 32051215Sbostic bpp = sp->cbpp; 32151215Sbostic fip = sp->fip; 32251215Sbostic start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 32351215Sbostic 32451215Sbostic s = splbio(); 32551215Sbostic for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { 32651215Sbostic nbp = bp->b_blockf; 32751915Sbostic /* 32851915Sbostic * XXX 32951915Sbostic * Should probably sleep on any BUSY buffer if 33051915Sbostic * doing an fsync? 33151915Sbostic */ 33251342Sbostic if (bp->b_flags & B_BUSY) 33351215Sbostic continue; 33451342Sbostic #ifdef DIAGNOSTIC 33551860Sbostic if (!(bp->b_flags & B_DELWRI)) 33651915Sbostic panic("lfs_gather: bp not B_DELWRI"); 33751860Sbostic if (!(bp->b_flags & B_LOCKED)) 33851915Sbostic panic("lfs_gather: bp not B_LOCKED"); 33951342Sbostic #endif 34051860Sbostic if (!match(fs, bp)) 34151215Sbostic continue; 34251342Sbostic 34351342Sbostic /* Insert into the buffer list, update the FINFO block. */ 34451342Sbostic *sp->cbpp++ = bp; 34551342Sbostic ++fip->fi_nblocks; 34651215Sbostic *lbp++ = bp->b_lblkno; 34751342Sbostic 34851860Sbostic /* 34951860Sbostic * If full, finish this segment. We may be doing I/O, so 35051860Sbostic * release and reacquire the splbio(). 35151860Sbostic */ 35251215Sbostic sp->sum_bytes_left -= sizeof(daddr_t); 35351215Sbostic sp->seg_bytes_left -= bp->b_bufsize; 35451342Sbostic if (sp->sum_bytes_left < sizeof(daddr_t) || 35551215Sbostic sp->seg_bytes_left < fs->lfs_bsize) { 35651215Sbostic splx(s); 35751342Sbostic lfs_updatemeta(fs, 35851860Sbostic sp, vp, start_lbp, bpp, lbp - start_lbp); 35951215Sbostic 36051342Sbostic /* Add the current file to the segment summary. */ 36151342Sbostic ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 36251215Sbostic 36351342Sbostic version = fip->fi_version; 36451860Sbostic lfs_writeseg(fs, sp); 36551915Sbostic lfs_initseg(fs, sp); 36651342Sbostic 36751215Sbostic fip = sp->fip; 36851342Sbostic fip->fi_version = version; 36951215Sbostic fip->fi_ino = ip->i_number; 37051342Sbostic start_lbp = lbp = fip->fi_blocks; 37151342Sbostic 37251215Sbostic bpp = sp->cbpp; 37351215Sbostic s = splbio(); 37451215Sbostic } 37551188Sbostic } 37651215Sbostic splx(s); 37751860Sbostic lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); 37851188Sbostic } 37951188Sbostic 38051342Sbostic /* 38151342Sbostic * Update the metadata that points to the blocks listed in the FINFO 38251188Sbostic * array. 38351188Sbostic */ 38451215Sbostic static void 38551860Sbostic lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) 38651499Sbostic struct lfs *fs; 38751215Sbostic SEGMENT *sp; 38851860Sbostic VNODE *vp; 38951215Sbostic daddr_t *lbp; 39051188Sbostic BUF **bpp; 39151215Sbostic int nblocks; 39251188Sbostic { 39351915Sbostic SEGUSE *sup; 39451915Sbostic BUF *bp; 39551860Sbostic INDIR a[NIADDR], *ap; 39651860Sbostic INODE *ip; 39751915Sbostic daddr_t daddr, lbn, off; 39851860Sbostic int db_per_fsb, error, i, num; 39951188Sbostic 40051860Sbostic #ifdef VERBOSE 40151860Sbostic printf("lfs_updatemeta\n"); 40251860Sbostic #endif 40351342Sbostic if (nblocks == 0) 40451215Sbostic return; 40551215Sbostic 40651915Sbostic /* Sort the blocks. */ 40751215Sbostic shellsort(bpp, lbp, nblocks); 40851215Sbostic 40951915Sbostic /* 41051915Sbostic * Assign disk addresses, and update references to the logical 41151915Sbostic * block and the segment usage information. 41251915Sbostic */ 41351860Sbostic db_per_fsb = fsbtodb(fs, 1); 41451915Sbostic for (i = nblocks; i--; ++bpp) { 41551915Sbostic lbn = *lbp++; 41651915Sbostic (*bpp)->b_blkno = off = fs->lfs_offset; 41751860Sbostic fs->lfs_offset += db_per_fsb; 41851215Sbostic 41951860Sbostic if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 42051915Sbostic panic("lfs_updatemeta: lfs_bmaparray returned %d", 42151915Sbostic error); 42251860Sbostic #ifdef META 42351860Sbostic printf("daddr: %d num: %d\n", daddr, num); 42451860Sbostic if (num != 0) { 42551860Sbostic int x; 42651915Sbostic printf("array from bmaparray:\n"); 42751860Sbostic for (x = 0; x < num; x++) 42851860Sbostic printf("\tlbn %d off %d\n", a[x].in_lbn, a[x].in_off); 42951860Sbostic } 43051860Sbostic #endif 43151860Sbostic ip = VTOI(vp); 43251860Sbostic switch (num) { 43351860Sbostic case 0: 43451356Sbostic #ifdef META 43551860Sbostic printf("update inode for direct block %d\n", lbn); 43651356Sbostic #endif 43751915Sbostic ip->i_db[lbn] = off; 43851860Sbostic break; 43951860Sbostic case 1: 44051915Sbostic ip->i_ib[a[0].in_off] = off; 44151860Sbostic break; 44251860Sbostic default: 44351860Sbostic ap = &a[num - 1]; 44451356Sbostic #ifdef META 44551860Sbostic printf("update indirect block %d offset %d\n", 44651860Sbostic ap->in_lbn, ap->in_off); 44751356Sbostic #endif 44851860Sbostic if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 44951860Sbostic panic("lfs_updatemeta: bread bno %d", 45051860Sbostic ap->in_lbn); 45151915Sbostic bp->b_un.b_daddr[ap->in_off] = off; 45251342Sbostic lfs_bwrite(bp); 45351188Sbostic } 45451915Sbostic 45551915Sbostic /* Update segment usage information. */ 45651915Sbostic if (daddr != UNASSIGNED) { 45751915Sbostic LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 45851915Sbostic sup->su_lastmod = time.tv_sec; 45951915Sbostic #ifdef DIAGNOSTIC 46051915Sbostic if (sup->su_nbytes < fs->lfs_bsize) 46151915Sbostic panic("lfs: negative bytes (segment %d)\n", 46251915Sbostic datosn(fs, daddr)); 46351915Sbostic #endif 46451915Sbostic sup->su_nbytes -= fs->lfs_bsize; 46551915Sbostic lfs_bwrite(bp); 46651915Sbostic } 46751188Sbostic } 46851188Sbostic } 46951188Sbostic 47051915Sbostic /* 47151915Sbostic * Start a new segment. 47251915Sbostic */ 47351860Sbostic static void 47451915Sbostic lfs_initseg(fs, sp) 47551499Sbostic struct lfs *fs; 47651215Sbostic SEGMENT *sp; 47751188Sbostic { 47851915Sbostic SEGUSE *sup; 47951915Sbostic SEGSUM *ssp; 48051915Sbostic struct buf *bp; 48151915Sbostic daddr_t lbn, *lbnp; 48251215Sbostic 48351860Sbostic #ifdef VERBOSE 48451915Sbostic printf("lfs_initseg\n"); 48551860Sbostic #endif 48651915Sbostic /* Advance to the next segment. */ 487*51927Sbostic if (!LFS_PARTIAL_FITS(fs)) { 488*51927Sbostic lfs_newseg(fs); 489*51927Sbostic fs->lfs_offset = fs->lfs_curseg; 49051915Sbostic sp->seg_number = datosn(fs, fs->lfs_curseg); 49151915Sbostic sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 49251915Sbostic 49351915Sbostic /* 494*51927Sbostic * If the segment contains a superblock, update the offset 495*51927Sbostic * and summary address to skip over it. 49651915Sbostic */ 49751915Sbostic LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 498*51927Sbostic if (sup->su_flags & SEGUSE_SUPERBLOCK) { 49951915Sbostic fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 50051915Sbostic sp->seg_bytes_left -= LFS_SBPAD; 50151215Sbostic } 50251915Sbostic brelse(bp); 50351915Sbostic } else { 50451915Sbostic sp->seg_number = datosn(fs, fs->lfs_curseg); 50551915Sbostic sp->seg_bytes_left = (fs->lfs_dbpseg - 50651915Sbostic (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 50751915Sbostic } 50851342Sbostic 50951915Sbostic sp->ibp = NULL; 51051915Sbostic sp->ninodes = 0; 51151342Sbostic 51251915Sbostic /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 51351915Sbostic sp->cbpp = sp->bpp; 51451915Sbostic *sp->cbpp = lfs_newbuf(fs, sp, fs->lfs_offset, LFS_SUMMARY_SIZE); 51551915Sbostic sp->segsum = (*sp->cbpp)->b_un.b_addr; 51651915Sbostic ++sp->cbpp; 51751915Sbostic fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 51851342Sbostic 51951915Sbostic /* Set point to SEGSUM, initialize it. */ 52051915Sbostic ssp = sp->segsum; 52151915Sbostic ssp->ss_next = fs->lfs_nextseg; 52251915Sbostic ssp->ss_create = time.tv_sec; 52351915Sbostic ssp->ss_nfinfo = ssp->ss_ninos = 0; 52451342Sbostic 52551915Sbostic /* Set pointer to first FINFO, initialize it. */ 52651915Sbostic sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM)); 52751915Sbostic sp->fip->fi_nblocks = 0; 52851342Sbostic 52951915Sbostic sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 53051915Sbostic sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 53151915Sbostic } 53251342Sbostic 53351915Sbostic /* 53451915Sbostic * Return the next segment to write. 53551915Sbostic */ 536*51927Sbostic static void 53751915Sbostic lfs_newseg(fs) 53851915Sbostic struct lfs *fs; 53951915Sbostic { 540*51927Sbostic CLEANERINFO *cip; 54151915Sbostic SEGUSE *sup; 54251915Sbostic struct buf *bp; 543*51927Sbostic int curseg, isdirty, sn; 54451915Sbostic 54551915Sbostic #ifdef VERBOSE 54651915Sbostic printf("lfs_newseg\n"); 54751915Sbostic #endif 548*51927Sbostic /* 549*51927Sbostic * Turn off the active bit for the current segment, turn on the 550*51927Sbostic * active and dirty bits for the next segment, update the cleaner 551*51927Sbostic * info. Set the current segment to the next segment, get a new 552*51927Sbostic * next segment. 553*51927Sbostic */ 554*51927Sbostic LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp); 555*51927Sbostic sup->su_flags &= ~SEGUSE_ACTIVE; 55651915Sbostic lfs_bwrite(bp); 557*51927Sbostic 558*51927Sbostic LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 559*51927Sbostic sup->su_flags |= SEGUSE_ACTIVE | SEGUSE_DIRTY; 560*51927Sbostic lfs_bwrite(bp); 561*51927Sbostic 562*51927Sbostic LFS_CLEANERINFO(cip, fs, bp); 563*51927Sbostic --cip->clean; 564*51927Sbostic ++cip->dirty; 565*51927Sbostic lfs_bwrite(bp); 566*51927Sbostic 567*51927Sbostic fs->lfs_lastseg = fs->lfs_curseg; 568*51927Sbostic fs->lfs_curseg = fs->lfs_nextseg; 569*51927Sbostic for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 57051915Sbostic sn = (sn + 1) % fs->lfs_nseg; 571*51927Sbostic if (sn == curseg) 57251915Sbostic panic("lfs_nextseg: no clean segments"); 57351915Sbostic LFS_SEGENTRY(sup, fs, sn, bp); 57451915Sbostic isdirty = sup->su_flags & SEGUSE_DIRTY; 57551915Sbostic brelse(bp); 57651915Sbostic if (!isdirty) 57751915Sbostic break; 57851915Sbostic } 579*51927Sbostic fs->lfs_nextseg = sntoda(fs, sn); 58051188Sbostic } 58151188Sbostic 58251188Sbostic static void 58351188Sbostic lfs_writeseg(fs, sp) 58451499Sbostic struct lfs *fs; 58551188Sbostic SEGMENT *sp; 58651188Sbostic { 58751915Sbostic BUF **bpp, *bp; 58851188Sbostic SEGUSE *sup; 58951860Sbostic SEGSUM *segp; 59051860Sbostic dev_t i_dev; 59151860Sbostic u_long *datap, *dp; 59251342Sbostic void *pmeta; 593*51927Sbostic int flags, i, nblocks, s, (*strategy)__P((BUF *)); 59451188Sbostic 59551860Sbostic #ifdef VERBOSE 59651860Sbostic printf("lfs_writeseg\n"); 59751860Sbostic #endif 59851188Sbostic /* 59951915Sbostic * Compute checksum across data and then across summary; 60051915Sbostic * the first block (the summary block) is skipped. 60151860Sbostic * 60251860Sbostic * XXX 60351860Sbostic * Fix this to do it inline, instead of malloc/copy. 60451188Sbostic */ 605*51927Sbostic nblocks = sp->cbpp - sp->bpp; 60651860Sbostic datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 60751915Sbostic for (bpp = sp->bpp, i = nblocks - 1; i--;) 60851915Sbostic *dp++ = (*++bpp)->b_un.b_words[0]; 60951860Sbostic 61051860Sbostic segp = (SEGSUM *)sp->segsum; 61151860Sbostic segp->ss_datasum = cksum(datap, nblocks * sizeof(u_long)); 61251860Sbostic segp->ss_sumsum = cksum(&segp->ss_datasum, 61351860Sbostic LFS_SUMMARY_SIZE - sizeof(segp->ss_sumsum)); 614*51927Sbostic free(datap, M_SEGMENT); 61551188Sbostic 61651188Sbostic /* 61751860Sbostic * When we gathered the blocks for I/O we did not mark them busy or 61851860Sbostic * remove them from the freelist. As we do this, turn off the B_LOCKED 61951860Sbostic * bit so the future brelse will put them on the LRU list, and add the 62051860Sbostic * B_CALL flags if we're doing a checkpoint so we can count I/O's. LFS 62151860Sbostic * requires that the super blocks (on checkpoint) be written after all 62251860Sbostic * the segment data. 62351188Sbostic */ 62451860Sbostic i_dev = VTOI(fs->lfs_ivnode)->i_dev; 62551860Sbostic strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 62651301Sbostic 62751301Sbostic s = splbio(); 62851860Sbostic if (sp->seg_flags & SEGM_CKP) { 62951860Sbostic fs->lfs_iocount += nblocks; 63051915Sbostic flags = B_ASYNC | B_BUSY | B_CALL; 63151860Sbostic } else 63251915Sbostic flags = B_ASYNC | B_BUSY; 63351860Sbostic for (bpp = sp->bpp, i = nblocks; i--;) { 63451860Sbostic bp = *bpp++; 63551860Sbostic bp->b_flags |= flags; 63651915Sbostic bp->b_flags &= 63751915Sbostic ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED); 63851860Sbostic bp->b_dev = i_dev; 63951860Sbostic bp->b_iodone = lfs_callback; 64051860Sbostic if (!(bp->b_flags & B_NOCACHE)) { 64151860Sbostic bremfree(bp); 64251860Sbostic reassignbuf(bp, bp->b_vp); 64351860Sbostic } 64451860Sbostic } 64551301Sbostic splx(s); 64651860Sbostic 64751860Sbostic for (bpp = sp->bpp, i = nblocks; i--;) 64851860Sbostic (strategy)(*bpp++); 649*51927Sbostic 650*51927Sbostic /* Update the segment usage information. */ 651*51927Sbostic LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 652*51927Sbostic sup->su_nbytes += LFS_SUMMARY_SIZE + (nblocks - 1 << fs->lfs_bshift); 653*51927Sbostic sup->su_lastmod = time.tv_sec; 654*51927Sbostic lfs_bwrite(bp); 65551188Sbostic } 65651188Sbostic 65751215Sbostic static void 65851860Sbostic lfs_writesuper(fs, sp) 65951499Sbostic struct lfs *fs; 66051860Sbostic SEGMENT *sp; 66151301Sbostic { 66251301Sbostic BUF *bp; 66351860Sbostic dev_t i_dev; 66451342Sbostic int (*strategy) __P((BUF *)); 66551301Sbostic 66651860Sbostic #ifdef VERBOSE 66751860Sbostic printf("lfs_writesuper\n"); 66851860Sbostic #endif 66951860Sbostic i_dev = VTOI(fs->lfs_ivnode)->i_dev; 67051860Sbostic strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 67151356Sbostic 67251342Sbostic /* Checksum the superblock and copy it into a buffer. */ 67351499Sbostic fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 67451860Sbostic bp = lfs_newbuf(fs, sp, fs->lfs_sboffs[0], LFS_SBPAD); 67551860Sbostic *bp->b_un.b_lfs = *fs; 67651215Sbostic 67751356Sbostic /* Write the first superblock (wait). */ 67851860Sbostic bp->b_dev = i_dev; 67951915Sbostic bp->b_flags |= B_BUSY; 68051860Sbostic bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 68151342Sbostic (strategy)(bp); 68251215Sbostic biowait(bp); 68351342Sbostic 68451356Sbostic /* Write the second superblock (don't wait). */ 68551215Sbostic bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 68651915Sbostic bp->b_flags |= B_ASYNC | B_BUSY; 68751860Sbostic bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 68851342Sbostic (strategy)(bp); 68951215Sbostic } 69051215Sbostic 69151342Sbostic /* 69251342Sbostic * Logical block number match routines used when traversing the dirty block 69351342Sbostic * chain. 69451342Sbostic */ 69551490Sbostic static int 69651860Sbostic match_data(fs, bp) 69751860Sbostic struct lfs *fs; 69851215Sbostic BUF *bp; 69951215Sbostic { 70051342Sbostic return (bp->b_lblkno >= 0); 70151215Sbostic } 70251215Sbostic 70351490Sbostic static int 70451860Sbostic match_indir(fs, bp) 70551860Sbostic struct lfs *fs; 70651215Sbostic BUF *bp; 70751215Sbostic { 70851860Sbostic int lbn; 70951860Sbostic 71051860Sbostic lbn = bp->b_lblkno; 71151860Sbostic return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 71251215Sbostic } 71351215Sbostic 71451490Sbostic static int 71551860Sbostic match_dindir(fs, bp) 71651860Sbostic struct lfs *fs; 71751215Sbostic BUF *bp; 71851215Sbostic { 71951860Sbostic int lbn; 72051860Sbostic 72151860Sbostic lbn = bp->b_lblkno; 72251860Sbostic return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 72351215Sbostic } 72451215Sbostic 72551860Sbostic static int 72651860Sbostic match_tindir(fs, bp) 72751499Sbostic struct lfs *fs; 72851860Sbostic BUF *bp; 72951342Sbostic { 73051860Sbostic int lbn; 73151342Sbostic 73251860Sbostic lbn = bp->b_lblkno; 73351860Sbostic return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 73451860Sbostic } 73551342Sbostic 73651860Sbostic /* 73751860Sbostic * Allocate a new buffer header. 73851860Sbostic */ 73951860Sbostic static BUF * 74051860Sbostic lfs_newbuf(fs, sp, daddr, size) 74151860Sbostic struct lfs *fs; 74251860Sbostic SEGMENT *sp; 74351860Sbostic daddr_t daddr; 74451860Sbostic size_t size; 74551860Sbostic { 74651860Sbostic BUF *bp; 74751342Sbostic 74851860Sbostic #ifdef VERBOSE 74951860Sbostic printf("lfs_newbuf\n"); 75051860Sbostic #endif 75151860Sbostic bp = getnewbuf(); 75251860Sbostic bremhash(bp); 75351860Sbostic bgetvp(fs->lfs_ivnode, bp); 75451860Sbostic bp->b_bcount = 0; 75551860Sbostic bp->b_lblkno = daddr; 75651860Sbostic bp->b_blkno = daddr; 75751860Sbostic bp->b_error = 0; 75851860Sbostic bp->b_resid = 0; 75951860Sbostic allocbuf(bp, size); 76051860Sbostic bp->b_flags |= B_NOCACHE; 76151915Sbostic binshash(bp, &bfreelist[BQ_AGE]); 76251860Sbostic return (bp); 76351860Sbostic } 76451342Sbostic 76551860Sbostic /* 76651860Sbostic * The buffer cache callback routine. 76751860Sbostic */ 76851860Sbostic static int /* XXX should be void */ 76951860Sbostic lfs_callback(bp) 77051860Sbostic BUF *bp; 77151860Sbostic { 77251860Sbostic struct lfs *fs; 77351342Sbostic 77451860Sbostic fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 77551860Sbostic #ifdef DIAGNOSTIC 77651860Sbostic if (fs->lfs_iocount == 0) 77751860Sbostic panic("lfs_callback: zero iocount\n"); 77851860Sbostic #endif 77951860Sbostic if (--fs->lfs_iocount == 0) 78051860Sbostic wakeup(&fs->lfs_iocount); 78151915Sbostic 78251860Sbostic brelse(bp); 78351860Sbostic } 78451342Sbostic 78551215Sbostic /* 78651188Sbostic * Shellsort (diminishing increment sort) from Data Structures and 78751188Sbostic * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 78851188Sbostic * see also Knuth Vol. 3, page 84. The increments are selected from 78951188Sbostic * formula (8), page 95. Roughly O(N^3/2). 79051188Sbostic */ 79151188Sbostic /* 79251188Sbostic * This is our own private copy of shellsort because we want to sort 79351188Sbostic * two parallel arrays (the array of buffer pointers and the array of 79451188Sbostic * logical block numbers) simultaneously. Note that we cast the array 79551188Sbostic * of logical block numbers to a unsigned in this routine so that the 79651188Sbostic * negative block numbers (meta data blocks) sort AFTER the data blocks. 79751188Sbostic */ 79851188Sbostic static void 79951188Sbostic shellsort(bp_array, lb_array, nmemb) 80051188Sbostic BUF **bp_array; 80151215Sbostic daddr_t *lb_array; 80251188Sbostic register int nmemb; 80351188Sbostic { 80451188Sbostic static int __rsshell_increments[] = { 4, 1, 0 }; 80551188Sbostic register int incr, *incrp, t1, t2; 80651188Sbostic BUF *bp_temp; 80751188Sbostic u_long lb_temp; 80851188Sbostic 80951188Sbostic for (incrp = __rsshell_increments; incr = *incrp++;) 81051188Sbostic for (t1 = incr; t1 < nmemb; ++t1) 81151188Sbostic for (t2 = t1 - incr; t2 >= 0;) 81251188Sbostic if (lb_array[t2] > lb_array[t2 + incr]) { 81351188Sbostic lb_temp = lb_array[t2]; 81451188Sbostic lb_array[t2] = lb_array[t2 + incr]; 81551188Sbostic lb_array[t2 + incr] = lb_temp; 81651188Sbostic bp_temp = bp_array[t2]; 81751188Sbostic bp_array[t2] = bp_array[t2 + incr]; 81851188Sbostic bp_array[t2 + incr] = bp_temp; 81951188Sbostic t2 -= incr; 82051188Sbostic } else 82151188Sbostic break; 82251188Sbostic } 823