151188Sbostic /* 251188Sbostic * Copyright (c) 1991 Regents of the University of California. 351188Sbostic * All rights reserved. 451188Sbostic * 551188Sbostic * %sccs.include.redist.c% 651188Sbostic * 7*51301Sbostic * @(#)lfs_segment.c 5.3 (Berkeley) 10/03/91 851188Sbostic */ 951188Sbostic 1051215Sbostic #ifdef LOGFS 1151188Sbostic #include "param.h" 1251188Sbostic #include "systm.h" 1351188Sbostic #include "namei.h" 1451188Sbostic #include "resourcevar.h" 1551188Sbostic #include "kernel.h" 1651188Sbostic #include "file.h" 1751188Sbostic #include "stat.h" 1851188Sbostic #include "buf.h" 1951188Sbostic #include "proc.h" 2051188Sbostic #include "conf.h" 2151188Sbostic #include "vnode.h" 2251188Sbostic #include "specdev.h" 2351188Sbostic #include "fifo.h" 2451188Sbostic #include "malloc.h" 2551188Sbostic #include "mount.h" 2651188Sbostic #include "../ufs/lockf.h" 2751188Sbostic #include "../ufs/quota.h" 2851188Sbostic #include "../ufs/inode.h" 2951188Sbostic #include "../ufs/dir.h" 3051188Sbostic #include "../ufs/ufsmount.h" 3151188Sbostic #include "lfs.h" 3251188Sbostic #include "lfs_extern.h" 3351188Sbostic 3451188Sbostic /* 35*51301Sbostic * Add a check so that if the segment is empty, you don't write it. 36*51301Sbostic * 37*51301Sbostic * Change lfs_ialloc to allocate a new page of inodes if you have to. 38*51301Sbostic * 39*51301Sbostic * Need to keep vnode v_numoutput up to date for pending writes? Could 40*51301Sbostic * actually fire off the datablock writes before you finish. This would give 41*51301Sbostic * them a chance to get started earlier. 42*51301Sbostic */ 4351188Sbostic 4451188Sbostic static int lfs_biocallback __P((BUF *)); 4551188Sbostic static void lfs_endsum __P((LFS *, SEGMENT *, int)); 4651215Sbostic static SEGMENT *lfs_gather 4751215Sbostic __P((LFS *, SEGMENT *, VNODE *, int (*) __P((BUF *)))); 4851188Sbostic static BUF *lfs_newbuf __P((LFS *, daddr_t, size_t)); 4951188Sbostic static SEGMENT *lfs_newseg __P((LFS *)); 5051188Sbostic static void lfs_newsum __P((LFS *, SEGMENT *)); 5151188Sbostic static daddr_t lfs_nextseg __P((LFS *)); 5251215Sbostic static void lfs_updatemeta __P((LFS *, SEGMENT *, INODE *, daddr_t *, 5351215Sbostic BUF **, int)); 54*51301Sbostic static SEGMENT *lfs_writeckp __P((LFS *, SEGMENT *)); 5551215Sbostic static SEGMENT *lfs_writefile __P((SEGMENT *, LFS *, VNODE *, int)); 5651215Sbostic static SEGMENT *lfs_writeinode __P((LFS *, SEGMENT *, VNODE *)); 5751188Sbostic static void lfs_writeseg __P((LFS *, SEGMENT *)); 58*51301Sbostic static void lfs_writesum __P((LFS *)); 5951215Sbostic static void lfs_writesuper __P((LFS *, SEGMENT *)); 6051215Sbostic static int match_data __P((BUF *)); 6151215Sbostic static int match_dindir __P((BUF *)); 6251215Sbostic static int match_indir __P((BUF *)); 6351215Sbostic static void shellsort __P((BUF **, daddr_t *, register int)); 6451188Sbostic 6551188Sbostic /* 6651188Sbostic * XXX -- when we add fragments in here, we will need to allocate a larger 6751188Sbostic * buffer pointer array (sp->bpp). 6851188Sbostic */ 6951188Sbostic int 7051215Sbostic lfs_segwrite(mp, do_ckp) 7151188Sbostic MOUNT *mp; 7251215Sbostic int do_ckp; /* do a checkpoint too */ 7351188Sbostic { 7451188Sbostic FINFO *fip; /* current file info structure */ 7551188Sbostic INODE *ip; 7651188Sbostic LFS *fs; 7751188Sbostic VNODE *vp; 7851188Sbostic SEGMENT *sp; 79*51301Sbostic int s; 8051188Sbostic 8151188Sbostic fs = VFSTOUFS(mp)->um_lfs; 82*51301Sbostic /* 83*51301Sbostic * LFS requires that the summary blocks be written after the rest of 84*51301Sbostic * the segment, and that the super blocks (on checkpoint) be written 85*51301Sbostic * last of all. We keep a cumulative count of the outstanding blocks 86*51301Sbostic * from all of the segments, and write these blocks when this count 87*51301Sbostic * goes to zero. If the disk drive catches up with us it could go 88*51301Sbostic * to zero before we finish, so we artificially increment it by one 89*51301Sbostic * until we've scheduled all of the writes we intend to do. At the 90*51301Sbostic * moment, the user's process hangs around so we can sleep; this should 91*51301Sbostic * probably be redone using a kernel thread. 92*51301Sbostic */ 93*51301Sbostic s = splbio(); 94*51301Sbostic fs->lfs_iocount = 1; 95*51301Sbostic splx(s); 96*51301Sbostic 9751188Sbostic sp = lfs_newseg(fs); 9851188Sbostic loop: 9951188Sbostic for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 10051188Sbostic /* 10151188Sbostic * If the vnode that we are about to sync is no longer 10251188Sbostic * associated with this mount point, start over. 10351188Sbostic */ 10451188Sbostic if (vp->v_mount != mp) 10551188Sbostic goto loop; 10651188Sbostic if (VOP_ISLOCKED(vp)) 10751188Sbostic continue; 10851188Sbostic ip = VTOI(vp); 10951188Sbostic if (ip->i_number == LFS_IFILE_INUM) 11051188Sbostic continue; 11151188Sbostic if ((ip->i_flag & (IMOD|IACC|IUPD|ICHG)) == 0 && 11251188Sbostic vp->v_dirtyblkhd == NULL) 11351188Sbostic continue; 11451188Sbostic if (vget(vp)) 11551188Sbostic goto loop; 11651215Sbostic sp = lfs_writefile(sp, fs, vp, do_ckp); 11751188Sbostic vput(vp); 11851188Sbostic } 119*51301Sbostic 12051215Sbostic if (do_ckp) 121*51301Sbostic sp = lfs_writeckp(fs, sp); 12251215Sbostic else 12351215Sbostic lfs_writeseg(fs, sp); 124*51301Sbostic s = splbio(); 125*51301Sbostic if (--fs->lfs_iocount) 126*51301Sbostic sleep(&fs->lfs_iocount, PRIBIO + 1); 127*51301Sbostic splx(s); 128*51301Sbostic lfs_writesum(fs); 129*51301Sbostic if (do_ckp) 130*51301Sbostic lfs_writesuper(fs, sp); 13151188Sbostic return (0); 13251188Sbostic } 13351188Sbostic 13451188Sbostic static int 13551188Sbostic lfs_biocallback(bp) 13651188Sbostic BUF *bp; 13751188Sbostic { 13851188Sbostic LFS *fs; 13951188Sbostic SEGMENT *sp, *next_sp; 14051188Sbostic VNODE *devvp; 14151188Sbostic 14251215Sbostic /* 143*51301Sbostic * XXX 144*51301Sbostic * Reset the flags (probably wrong). If the contents of the buffer 145*51301Sbostic * are valid, move back onto the clean list. 14651215Sbostic */ 147*51301Sbostic bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 148*51301Sbostic fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 14951215Sbostic if (bp->b_flags & B_NOCACHE) 15051215Sbostic bp->b_vp = NULL; 151*51301Sbostic else 15251215Sbostic reassignbuf(bp, bp->b_vp); 153*51301Sbostic brelse(bp); 15451215Sbostic 155*51301Sbostic printf("callback: buffer: %x iocount %d\n", bp, fs->lfs_iocount); 156*51301Sbostic if (fs->lfs_iocount == 0) 157*51301Sbostic panic("lfs_biocallback: zero iocount\n"); 15851215Sbostic 159*51301Sbostic if (--fs->lfs_iocount == 0) 160*51301Sbostic wakeup(&fs->lfs_iocount); 16151188Sbostic } 16251188Sbostic 16351188Sbostic static void 16451188Sbostic lfs_endsum(fs, sp, calc_next) 16551188Sbostic LFS *fs; 16651188Sbostic SEGMENT *sp; 167*51301Sbostic int calc_next; /* If 1 calculate next, else set to -1. */ 16851188Sbostic { 16951188Sbostic BUF *bp; 17051188Sbostic SEGSUM *ssp; 17151188Sbostic daddr_t next_addr; 17251215Sbostic int npages, nseg_pages, nsums_per_blk; 17351188Sbostic 17451215Sbostic if (sp->sbp == NULL) 17551215Sbostic return; 17651215Sbostic 17751188Sbostic ssp = sp->segsum; 178*51301Sbostic ssp->ss_nextsum = calc_next ? 179*51301Sbostic sp->sum_addr - LFS_SUMMARY_SIZE / DEV_BSIZE : (daddr_t)-1; 18051188Sbostic 181*51301Sbostic /* 182*51301Sbostic * XXX 183*51301Sbostic * I don't understand how sum_num works; the 0 base seems wrong. 184*51301Sbostic */ 185*51301Sbostic nsums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; 186*51301Sbostic if ((sp->sum_num % (fs->lfs_bsize / LFS_SUMMARY_SIZE)) == 187*51301Sbostic (nsums_per_blk - 1)) { 18851188Sbostic /* 189*51301Sbostic * This buffer is now full. Compute the next address if 190*51301Sbostic * appropriate and the checksum, and close the buffer by 191*51301Sbostic * setting sp->sbp NULL. 19251188Sbostic */ 19351215Sbostic if (calc_next) { 19451215Sbostic nsums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; 19551215Sbostic nseg_pages = 1 + sp->sum_num / nsums_per_blk; 196*51301Sbostic npages = nseg_pages + 197*51301Sbostic (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); 198*51301Sbostic next_addr = fs->lfs_sboffs[0] + (sp->seg_number + 1) * 199*51301Sbostic fsbtodb(fs, fs->lfs_ssize) - 200*51301Sbostic fsbtodb(fs, (npages - 1)) - 201*51301Sbostic LFS_SUMMARY_SIZE / DEV_BSIZE; 20251188Sbostic ssp->ss_nextsum = next_addr; 20351188Sbostic } 204*51301Sbostic ssp->ss_cksum = cksum(&ssp->ss_cksum, 205*51301Sbostic LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); 20651215Sbostic sp->sbp = NULL; 20751215Sbostic } else 20851188Sbostic /* Calculate cksum on previous segment summary */ 20951188Sbostic ssp->ss_cksum = cksum(&ssp->ss_cksum, 21051188Sbostic LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); 21151215Sbostic } 21251215Sbostic 21351215Sbostic static SEGMENT * 21451215Sbostic lfs_gather(fs, sp, vp, match) 21551215Sbostic LFS *fs; 21651215Sbostic SEGMENT *sp; 21751215Sbostic VNODE *vp; 21851215Sbostic int (*match) __P((BUF *)); 21951215Sbostic { 22051215Sbostic BUF **bpp, *bp, *nbp; 22151215Sbostic FINFO *fip; 22251215Sbostic INODE *ip; 22351215Sbostic int count, s, version; 22451215Sbostic daddr_t *lbp, *start_lbp; 22551215Sbostic 22651215Sbostic ip = VTOI(vp); 22751215Sbostic bpp = sp->cbpp; 22851215Sbostic fip = sp->fip; 22951215Sbostic version = fip->fi_version; 23051215Sbostic start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 23151215Sbostic count = 0; 23251215Sbostic 23351215Sbostic s = splbio(); 23451215Sbostic for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { 23551215Sbostic nbp = bp->b_blockf; 23651215Sbostic if ((bp->b_flags & B_BUSY)) 23751215Sbostic continue; 23851215Sbostic if ((bp->b_flags & B_DELWRI) == 0) 23951215Sbostic panic("lfs_write: not dirty"); 24051215Sbostic if (!match(bp)) 24151215Sbostic continue; 24251215Sbostic bremfree(bp); 24351215Sbostic bp->b_flags |= B_BUSY | B_CALL; 244*51301Sbostic bp->b_iodone = lfs_biocallback; 24551215Sbostic bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev; 24651215Sbostic 24751215Sbostic *lbp++ = bp->b_lblkno; 24851215Sbostic *sp->cbpp++ = bp; 24951215Sbostic fip->fi_nblocks++; 25051215Sbostic sp->sum_bytes_left -= sizeof(daddr_t); 25151215Sbostic sp->seg_bytes_left -= bp->b_bufsize; 25251215Sbostic if (sp->sum_bytes_left < sizeof(daddr_t) || 25351215Sbostic sp->seg_bytes_left < fs->lfs_bsize) { 25451215Sbostic /* 25551215Sbostic * We are about to allocate a new summary block 25651215Sbostic * and possibly a new segment. So, we need to 25751215Sbostic * sort the blocks we've done so far, and assign 25851215Sbostic * the disk addresses, so we can start a new block 25951215Sbostic * correctly. We may be doing I/O so we need to 26051215Sbostic * release the s lock before doing anything. 26151215Sbostic */ 26251215Sbostic splx(s); 26351215Sbostic lfs_updatemeta(fs, sp, ip, start_lbp, bpp, 26451215Sbostic lbp - start_lbp); 26551215Sbostic 26651215Sbostic /* Put this file in the segment summary */ 26751215Sbostic ((SEGSUM *)(sp->segsum))->ss_nfinfo++; 26851215Sbostic 26951215Sbostic if (sp->seg_bytes_left < fs->lfs_bsize) { 27051215Sbostic lfs_writeseg(fs, sp); 27151215Sbostic sp = lfs_newseg(fs); 27251215Sbostic } else if (sp->sum_bytes_left < sizeof(daddr_t)) 27351215Sbostic lfs_newsum(fs, sp); 27451215Sbostic fip = sp->fip; 27551215Sbostic fip->fi_ino = ip->i_number; 27651215Sbostic fip->fi_version = version; 27751215Sbostic bpp = sp->cbpp; 278*51301Sbostic /* You know that you have a new FINFO either way. */ 27951215Sbostic start_lbp = lbp = fip->fi_blocks; 28051215Sbostic s = splbio(); 28151215Sbostic } 28251188Sbostic } 28351215Sbostic splx(s); 28451215Sbostic lfs_updatemeta(fs, sp, ip, start_lbp, bpp, lbp - start_lbp); 28551215Sbostic return(sp); 28651188Sbostic } 28751188Sbostic 28851188Sbostic static BUF * 28951188Sbostic lfs_newbuf(fs, daddr, size) 29051188Sbostic LFS *fs; 29151188Sbostic daddr_t daddr; 29251188Sbostic size_t size; 29351188Sbostic { 29451188Sbostic BUF *bp; 29551188Sbostic 29651188Sbostic bp = getnewbuf(); 29751188Sbostic bremhash(bp); 29851215Sbostic bp->b_vp = fs->lfs_ivnode; 29951188Sbostic bp->b_bcount = 0; 30051215Sbostic bp->b_blkno = bp->b_lblkno = daddr; 30151188Sbostic bp->b_error = 0; 30251188Sbostic bp->b_resid = 0; 303*51301Sbostic bp->b_flags |= B_DELWRI | B_NOCACHE; 30451215Sbostic bp->b_iodone = lfs_biocallback; 305*51301Sbostic bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev; 30651188Sbostic allocbuf(bp, size); 30751188Sbostic return (bp); 30851188Sbostic } 30951188Sbostic 310*51301Sbostic /* Start a new segment. */ 31151188Sbostic static SEGMENT * 31251188Sbostic lfs_newseg(fs) 31351188Sbostic LFS *fs; 31451188Sbostic { 31551188Sbostic SEGMENT *sp; 31651188Sbostic SEGUSE *sup; 31751188Sbostic 318*51301Sbostic printf("lfs_newseg: new segment %x\n", fs->lfs_nextseg); 319*51301Sbostic 32051188Sbostic sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK); 321*51301Sbostic sp->nextp = NULL; 32251188Sbostic sp->cbpp = sp->bpp = 32351188Sbostic malloc(fs->lfs_ssize * sizeof(BUF *), M_SEGMENT, M_WAITOK); 324*51301Sbostic sp->ibp = sp->sbp = NULL; 32551188Sbostic sp->sum_bytes_left = LFS_SUMMARY_SIZE; 32651188Sbostic sp->seg_bytes_left = (fs->lfs_segmask + 1) - LFS_SUMMARY_SIZE; 32751188Sbostic sp->saddr = fs->lfs_nextseg; 32851188Sbostic sp->sum_addr = sp->saddr + sp->seg_bytes_left / DEV_BSIZE; 32951188Sbostic sp->ninodes = 0; 33051188Sbostic sp->sum_num = -1; 33151215Sbostic sp->seg_number = 33251215Sbostic (sp->saddr - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize); 33351188Sbostic 334*51301Sbostic /* Bump the segment count. */ 335*51301Sbostic fs->lfs_nextseg = lfs_nextseg(fs); 336*51301Sbostic lfs_newsum(fs, sp); /* Init sp->segsum. */ 337*51301Sbostic 33851188Sbostic sup = fs->lfs_segtab + sp->seg_number; 33951188Sbostic 34051188Sbostic if (sup->su_nbytes != 0) { 341*51301Sbostic /* This is a segment containing a super block. */ 34251188Sbostic FINFO *fip; 34351188Sbostic daddr_t lbn, *lbnp; 34451215Sbostic SEGSUM *ssp; 34551188Sbostic 34651215Sbostic ssp = (SEGSUM *)sp->segsum; 34751215Sbostic ssp->ss_nfinfo++; 34851188Sbostic fip = sp->fip; 34951188Sbostic fip->fi_nblocks = LFS_SBPAD >> fs->lfs_bshift; 35051188Sbostic fip->fi_version = 1; 35151188Sbostic fip->fi_ino = LFS_UNUSED_INUM; 35251188Sbostic sp->saddr += fsbtodb(fs, fip->fi_nblocks); 35351188Sbostic lbnp = fip->fi_blocks; 35451188Sbostic for (lbn = 0; lbn < fip->fi_nblocks; lbn++) 35551188Sbostic *lbnp++ = lbn; 35651188Sbostic sp->seg_bytes_left -= sup->su_nbytes; 35751188Sbostic sp->sum_bytes_left -= 35851188Sbostic sizeof(FINFO) + (fip->fi_nblocks - 1) * sizeof(daddr_t); 35951188Sbostic sp->fip = (FINFO *)lbnp; 36051188Sbostic } 36151188Sbostic return(sp); 36251188Sbostic } 36351188Sbostic 36451188Sbostic static void 36551188Sbostic lfs_newsum(fs, sp) 36651188Sbostic LFS *fs; 36751188Sbostic SEGMENT *sp; 36851188Sbostic { 36951188Sbostic SEGSUM *ssp; 37051215Sbostic int npages, nseg_pages, sums_per_blk; 37151188Sbostic 37251188Sbostic printf("lfs_newsum\n"); 37351215Sbostic lfs_endsum(fs, sp, 1); 37451215Sbostic ++sp->sum_num; 37551215Sbostic if (sp->sbp == NULL) { 37651215Sbostic /* Allocate a new buffer. */ 37751215Sbostic if (sp->seg_bytes_left < fs->lfs_bsize) { 37851215Sbostic lfs_writeseg(fs, sp); 37951215Sbostic sp = lfs_newseg(fs); 38051215Sbostic } 38151215Sbostic sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; 38251215Sbostic nseg_pages = 1 + sp->sum_num / sums_per_blk; 38351215Sbostic npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); 38451215Sbostic sp->sum_addr = fs->lfs_sboffs[0] + 38551215Sbostic (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) 38651215Sbostic - fsbtodb(fs, npages); 38751215Sbostic sp->sbp = lfs_newbuf(fs, sp->sum_addr, fs->lfs_bsize); 388*51301Sbostic if (sp->sum_num != 0) 389*51301Sbostic sp->sbp->b_flags |= B_CALL; 39051215Sbostic sp->bpp[fs->lfs_ssize - npages] = sp->sbp; 39151215Sbostic printf("Inserting summary block, address %x at index %d\n", 39251215Sbostic sp->sbp->b_lblkno, fs->lfs_ssize - npages); 39351215Sbostic sp->seg_bytes_left -= fs->lfs_bsize; 394*51301Sbostic sp->segsum = 395*51301Sbostic sp->sbp->b_un.b_addr + fs->lfs_bsize - LFS_SUMMARY_SIZE; 39651215Sbostic sp->sum_addr += (fs->lfs_bsize - LFS_SUMMARY_SIZE) / DEV_BSIZE; 39751188Sbostic } else { 39851215Sbostic sp->segsum -= LFS_SUMMARY_SIZE; 39951215Sbostic sp->sum_addr -= LFS_SUMMARY_SIZE / DEV_BSIZE; 40051188Sbostic } 40151188Sbostic 40251215Sbostic ssp = sp->segsum; 403*51301Sbostic ssp->ss_next = fs->lfs_nextseg; 40451215Sbostic ssp->ss_prev = fs->lfs_lastseg; 40551215Sbostic 40651188Sbostic /* Initialize segment summary info. */ 40751188Sbostic sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM)); 40851215Sbostic sp->fip->fi_nblocks = 0; 40951188Sbostic ssp->ss_nextsum = (daddr_t)-1; 41051188Sbostic ssp->ss_create = time.tv_sec; 41151188Sbostic 41251188Sbostic ssp->ss_nfinfo = 0; 41351188Sbostic ssp->ss_ninos = 0; 41451188Sbostic sp->sum_bytes_left -= LFS_SUMMARY_SIZE; 41551188Sbostic sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 41651188Sbostic } 41751188Sbostic 41851188Sbostic #define seginc(fs, sn) ((sn + 1) % fs->lfs_nseg) 41951188Sbostic static daddr_t 42051188Sbostic lfs_nextseg(fs) 42151188Sbostic LFS *fs; 42251188Sbostic { 42351188Sbostic int segnum, sn; 42451188Sbostic SEGUSE *sup; 42551188Sbostic 42651188Sbostic segnum = satosn(fs, fs->lfs_nextseg); 42751215Sbostic for (sn = seginc(fs, segnum); sn != segnum; sn = seginc(fs, sn)) 42851215Sbostic if (!(fs->lfs_segtab[sn].su_flags & SEGUSE_DIRTY)) 42951188Sbostic break; 43051215Sbostic 43151188Sbostic if (sn == segnum) 43251188Sbostic panic("lfs_nextseg: file system full"); /* XXX */ 43351188Sbostic return(sntosa(fs, sn)); 43451188Sbostic } 43551188Sbostic 43651188Sbostic /* 43751188Sbostic * Update the metadata that points to the blocks listed in the FIP 43851188Sbostic * array. 43951188Sbostic */ 44051215Sbostic static void 44151215Sbostic lfs_updatemeta(fs, sp, ip, lbp, bpp, nblocks) 44251188Sbostic LFS *fs; 44351215Sbostic SEGMENT *sp; 44451188Sbostic INODE *ip; 44551215Sbostic daddr_t *lbp; 44651188Sbostic BUF **bpp; 44751215Sbostic int nblocks; 44851188Sbostic { 44951188Sbostic SEGUSE *segup; 45051215Sbostic BUF **lbpp, *bp, *mbp; 45151188Sbostic daddr_t da, iblkno; 45251215Sbostic int db_per_fsb, error, i, oldsegnum; 45351215Sbostic long lbn; 45451188Sbostic 45551215Sbostic printf("lfs_updatemeta of %d blocks\n", nblocks); 456*51301Sbostic if (nblocks == 0 && (ip->i_flag & (IMOD | IACC | IUPD | ICHG)) == 0) 45751215Sbostic return; 45851215Sbostic 45951215Sbostic /* First sort the blocks and add disk addresses */ 46051215Sbostic shellsort(bpp, lbp, nblocks); 46151215Sbostic 46251215Sbostic db_per_fsb = 1 << fs->lfs_fsbtodb; 46351215Sbostic for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) { 46451215Sbostic (*lbpp)->b_blkno = sp->saddr; 46551215Sbostic sp->saddr += db_per_fsb; 46651215Sbostic } 46751215Sbostic 46851215Sbostic for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) { 46951215Sbostic lbn = lbp[i]; 47051215Sbostic printf("lfs_updatemeta: block %d\n", lbn); 47151188Sbostic if (error = lfs_bmap(ip, lbn, &da)) 47251215Sbostic panic("lfs_updatemeta: lfs_bmap returned error"); 47351188Sbostic 47451188Sbostic if (da) { 47551215Sbostic /* Update segment usage information */ 47651188Sbostic oldsegnum = (da - fs->lfs_sboffs[0]) / 47751188Sbostic fsbtodb(fs, fs->lfs_ssize); 47851188Sbostic segup = fs->lfs_segtab+oldsegnum; 47951188Sbostic segup->su_lastmod = time.tv_sec; 48051188Sbostic if ((segup->su_nbytes -= fs->lfs_bsize) < 0) 48151188Sbostic printf("lfs_updatemeta: negative bytes %s %d\n", 48251188Sbostic "in segment", oldsegnum); 48351188Sbostic } 48451188Sbostic 48551215Sbostic /* 48651215Sbostic * Now change whoever points to lbn. We could start with the 48751215Sbostic * smallest (most negative) block number in these if clauses, 48851215Sbostic * but we assume that indirect blocks are least common, and 48951215Sbostic * handle them separately. 49051215Sbostic */ 49151215Sbostic bp = NULL; 49251215Sbostic if (lbn < 0) { 49351215Sbostic if (lbn < -NIADDR) { 49451215Sbostic printf("lfs_updatemeta: changing indirect block %d\n", D_INDIR); 49551215Sbostic if (error = bread(ITOV(ip), D_INDIR, 49651215Sbostic fs->lfs_bsize, NOCRED, &bp)) 49751215Sbostic panic("lfs_updatemeta: error on bread"); 49851215Sbostic 49951215Sbostic bp->b_un.b_daddr[-lbn % NINDIR(fs)] = 50051215Sbostic (*lbpp)->b_blkno; 50151215Sbostic } else 50251215Sbostic ip->i_din.di_ib[-lbn-1] = (*lbpp)->b_blkno; 50351215Sbostic 50451215Sbostic } else if (lbn < NDADDR) 50551188Sbostic ip->i_din.di_db[lbn] = (*lbpp)->b_blkno; 50651188Sbostic else if ((lbn -= NDADDR) < NINDIR(fs)) { 50751188Sbostic printf("lfs_updatemeta: changing indirect block %d\n", S_INDIR); 50851215Sbostic if (error = bread(ITOV(ip), S_INDIR, fs->lfs_bsize, 50951215Sbostic NOCRED, &bp)) 51051215Sbostic panic("lfs_updatemeta: bread returned error"); 51151215Sbostic 51251188Sbostic bp->b_un.b_daddr[lbn] = (*lbpp)->b_blkno; 51351188Sbostic } else if ( (lbn = (lbn - NINDIR(fs)) / NINDIR(fs)) < 51451188Sbostic NINDIR(fs)) { 51551188Sbostic 51651188Sbostic iblkno = - (lbn + NIADDR + 1); 51751188Sbostic printf("lfs_updatemeta: changing indirect block %d\n", iblkno); 51851215Sbostic if (error = bread(ITOV(ip), iblkno, fs->lfs_bsize, 51951215Sbostic NOCRED, &bp)) 52051215Sbostic panic("lfs_updatemeta: bread returned error"); 52151215Sbostic 52251188Sbostic bp->b_un.b_daddr[lbn % NINDIR(fs)] = (*lbpp)->b_blkno; 52351188Sbostic } 52451188Sbostic else 52551215Sbostic panic("lfs_updatemeta: logical block number too large"); 52651215Sbostic if (bp) 52751215Sbostic lfs_bwrite(bp); 52851188Sbostic } 52951188Sbostic } 53051188Sbostic 531*51301Sbostic static SEGMENT * 53251215Sbostic lfs_writeckp(fs, sp) 53351215Sbostic LFS *fs; 53451215Sbostic SEGMENT *sp; 53551215Sbostic { 53651215Sbostic BUF *bp; 53751215Sbostic FINFO *fip; 53851215Sbostic INODE *ip; 53951215Sbostic SEGUSE *sup; 540*51301Sbostic void *xp; 54151215Sbostic daddr_t *lbp; 54251215Sbostic int bytes_needed, i; 54351215Sbostic 54451215Sbostic printf("lfs_writeckp\n"); 54551215Sbostic /* 54651215Sbostic * This will write the dirty ifile blocks, but not the segusage 54751215Sbostic * table nor the ifile inode. 54851215Sbostic */ 54951215Sbostic sp = lfs_writefile(sp, fs, fs->lfs_ivnode, 1); 55051215Sbostic 55151215Sbostic /* 55251215Sbostic * Make sure that the segment usage table and the ifile inode will 55351215Sbostic * fit in this segment. If they won't, put them in the next segment 55451215Sbostic */ 55551215Sbostic bytes_needed = fs->lfs_segtabsz << fs->lfs_bshift; 55651215Sbostic if (sp->ninodes % INOPB(fs) == 0) 55751215Sbostic bytes_needed += fs->lfs_bsize; 55851215Sbostic 55951215Sbostic if (sp->seg_bytes_left < bytes_needed) { 56051215Sbostic lfs_writeseg(fs, sp); 56151215Sbostic sp = lfs_newseg(fs); 56251215Sbostic } else if (sp->sum_bytes_left < (fs->lfs_segtabsz * sizeof(daddr_t))) 56351215Sbostic lfs_newsum(fs, sp); 56451215Sbostic 56551215Sbostic #ifdef DEBUG 56651215Sbostic if (sp->seg_bytes_left < bytes_needed) 56751215Sbostic panic("lfs_writeckp: unable to write checkpoint"); 56851215Sbostic #endif 56951215Sbostic 57051215Sbostic /* 571*51301Sbostic * Update the segment usage information and the ifile inode 572*51301Sbostic * and write it out. 57351215Sbostic */ 57451215Sbostic sup = fs->lfs_segtab + sp->seg_number; 575*51301Sbostic sup->su_nbytes = 576*51301Sbostic (fs->lfs_segmask + 1) - sp->seg_bytes_left + bytes_needed; 57751215Sbostic sup->su_lastmod = time.tv_sec; 57851215Sbostic sup->su_flags = SEGUSE_DIRTY; 57951215Sbostic 580*51301Sbostic /* Get buffers for the segusage table and write it out. */ 58151215Sbostic ip = VTOI(fs->lfs_ivnode); 58251215Sbostic fip = sp->fip; 58351215Sbostic lbp = &fip->fi_blocks[fip->fi_nblocks]; 58451215Sbostic for (xp = fs->lfs_segtab, i = 0; i < fs->lfs_segtabsz; 58551215Sbostic i++, xp += fs->lfs_bsize, lbp++) { 586*51301Sbostic *sp->cbpp++ = bp = lfs_newbuf(fs, sp->saddr, fs->lfs_bsize); 587*51301Sbostic bp->b_flags |= B_CALL; 58851215Sbostic bcopy(xp, bp->b_un.b_words, fs->lfs_bsize); 58951215Sbostic ip->i_din.di_db[i] = sp->saddr; 59051215Sbostic sp->saddr += (1 << fs->lfs_fsbtodb); 59151215Sbostic *lbp = i; 59251215Sbostic fip->fi_nblocks++; 59351215Sbostic } 59451215Sbostic sp = lfs_writeinode(fs, sp, fs->lfs_ivnode); 59551215Sbostic lfs_writeseg(fs, sp); 596*51301Sbostic return (sp); 59751215Sbostic } 59851215Sbostic 59951188Sbostic /* 60051188Sbostic * XXX -- I think we need to figure out what to do if we write 60151188Sbostic * the segment and find more dirty blocks when we're done. 60251188Sbostic */ 60351188Sbostic static SEGMENT * 60451215Sbostic lfs_writefile(sp, fs, vp, do_ckp) 60551188Sbostic SEGMENT *sp; 60651188Sbostic LFS *fs; 60751188Sbostic VNODE *vp; 60851215Sbostic int do_ckp; 60951188Sbostic { 61051188Sbostic FINFO *fip; 611*51301Sbostic ino_t inum; 61251188Sbostic 613*51301Sbostic /* Initialize the FINFO structure. */ 614*51301Sbostic inum = VTOI(vp)->i_number; 615*51301Sbostic printf("lfs_writefile: node %d\n", inum); 616*51301Sbostic sp->fip->fi_ino = inum; 61751215Sbostic sp->fip->fi_nblocks = 0; 618*51301Sbostic sp->fip->fi_version = 619*51301Sbostic inum == LFS_IFILE_INUM ? 1 : lfs_getversion(fs, inum); 62051188Sbostic 62151215Sbostic sp = lfs_gather(fs, sp, vp, match_data); 62251215Sbostic if (do_ckp) { 62351215Sbostic sp = lfs_gather(fs, sp, vp, match_indir); 62451215Sbostic sp = lfs_gather(fs, sp, vp, match_dindir); 62551215Sbostic } 62651188Sbostic 62751215Sbostic (void)printf("lfs_writefile: adding %d blocks to segment\n", 62851215Sbostic sp->fip->fi_nblocks); 62951215Sbostic /* 630*51301Sbostic * Update the inode for this file and reflect new inode address in 631*51301Sbostic * the ifile. If this is the ifile, don't update the inode, because 632*51301Sbostic * we're checkpointing and will update the inode with the segment 633*51301Sbostic * usage information (so we musn't bump the finfo pointer either). 63451215Sbostic */ 635*51301Sbostic if (inum != LFS_IFILE_INUM) { 63651215Sbostic sp = lfs_writeinode(fs, sp, vp); 63751215Sbostic fip = sp->fip; 63851215Sbostic if (fip->fi_nblocks) { 63951188Sbostic ((SEGSUM *)(sp->segsum))->ss_nfinfo++; 64051215Sbostic sp->fip = (FINFO *)((u_long)fip + sizeof(FINFO) + 64151215Sbostic sizeof(u_long) * fip->fi_nblocks - 1); 64251188Sbostic } 64351188Sbostic } 64451188Sbostic return(sp); 64551188Sbostic } 64651188Sbostic 64751215Sbostic static SEGMENT * 64851215Sbostic lfs_writeinode(fs, sp, vp) 64951215Sbostic LFS *fs; 65051215Sbostic SEGMENT *sp; 65151215Sbostic VNODE *vp; 65251188Sbostic { 65351215Sbostic BUF *bp; 65451215Sbostic INODE *ip; 65551215Sbostic SEGSUM *ssp; 65651215Sbostic daddr_t iaddr, next_addr; 65751215Sbostic int npages, nseg_pages, sums_per_blk; 65851215Sbostic struct dinode *dip; 65951215Sbostic 66051215Sbostic printf("lfs_writeinode\n"); 66151215Sbostic sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; 66251215Sbostic if (sp->ibp == NULL) { 66351215Sbostic /* Allocate a new buffer. */ 66451215Sbostic if (sp->seg_bytes_left < fs->lfs_bsize) { 66551215Sbostic lfs_writeseg(fs, sp); 66651215Sbostic sp = lfs_newseg(fs); 66751215Sbostic } 66851215Sbostic nseg_pages = (sp->sum_num + sums_per_blk) / sums_per_blk; 66951215Sbostic npages = nseg_pages + (sp->ninodes + INOPB(fs)) / INOPB(fs); 67051215Sbostic next_addr = fs->lfs_sboffs[0] + 67151215Sbostic (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) 67251215Sbostic - fsbtodb(fs, npages); 67351215Sbostic sp->ibp = lfs_newbuf(fs, next_addr, fs->lfs_bsize); 674*51301Sbostic sp->ibp->b_flags |= B_CALL; 67551215Sbostic sp->bpp[fs->lfs_ssize - npages] = sp->ibp; 67651215Sbostic sp->seg_bytes_left -= fs->lfs_bsize; 67751215Sbostic printf("alloc inode block @ daddr %x, bp = %x inserted at %d\n", 67851215Sbostic next_addr, sp->ibp, fs->lfs_ssize - npages); 67951215Sbostic } 68051215Sbostic ip = VTOI(vp); 68151215Sbostic bp = sp->ibp; 68251215Sbostic dip = bp->b_un.b_dino + (sp->ninodes % INOPB(fs)); 68351215Sbostic bcopy(&ip->i_din, dip, sizeof(struct dinode)); 68451215Sbostic iaddr = bp->b_blkno; 68551215Sbostic ++sp->ninodes; 68651215Sbostic ssp = sp->segsum; 68751215Sbostic ++ssp->ss_ninos; 68851215Sbostic if (sp->ninodes % INOPB(fs) == 0) 68951215Sbostic sp->ibp = NULL; 69051215Sbostic if (ip->i_number == LFS_IFILE_INUM) 69151215Sbostic fs->lfs_idaddr = iaddr; 69251215Sbostic else 69351215Sbostic lfs_iset(ip, iaddr, ip->i_atime); /* Update ifile */ 69451215Sbostic ip->i_flags &= ~(IMOD|IACC|IUPD|ICHG); /* make inode clean */ 69551215Sbostic return(sp); 69651188Sbostic } 69751188Sbostic 69851188Sbostic static void 69951188Sbostic lfs_writeseg(fs, sp) 70051188Sbostic LFS *fs; 70151188Sbostic SEGMENT *sp; 70251188Sbostic { 70351215Sbostic BUF **bpp; 70451188Sbostic SEGSUM *ssp; 70551188Sbostic SEGUSE *sup; 70651188Sbostic VNODE *devvp; 70751188Sbostic int nblocks, nbuffers, ninode_blocks, nsegsums, nsum_pb; 708*51301Sbostic int i, metaoff, nmeta, s; 70951188Sbostic 71051188Sbostic printf("lfs_writeseg\n"); 71151215Sbostic fs->lfs_lastseg = sntosa(fs, sp->seg_number); 71251215Sbostic lfs_endsum(fs, sp, 0); 71351188Sbostic 71451188Sbostic /* 71551188Sbostic * Copy inode and summary block buffer pointers down so they are 71651215Sbostic * contiguous with the page buffer pointers. 71751188Sbostic */ 71851215Sbostic ssp = sp->segsum; 71951215Sbostic nsum_pb = fs->lfs_bsize / LFS_SUMMARY_SIZE; 72051215Sbostic nbuffers = sp->cbpp - sp->bpp; 721*51301Sbostic /* 722*51301Sbostic * XXX 723*51301Sbostic * Why isn't this (sp->sum_num + nsum_pb - 1) / nsum_pb; 724*51301Sbostic */ 72551215Sbostic nsegsums = 1 + sp->sum_num / nsum_pb; 72651215Sbostic ninode_blocks = (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); 72751215Sbostic nmeta = ninode_blocks + nsegsums; 72851188Sbostic metaoff = fs->lfs_ssize - nmeta; 72951215Sbostic nblocks = nbuffers + nmeta; 73051188Sbostic if (sp->bpp + metaoff != sp->cbpp) 73151215Sbostic bcopy(sp->bpp + metaoff, sp->cbpp, sizeof(BUF *) * nmeta); 73251215Sbostic sp->cbpp += nmeta; 73351188Sbostic 73451188Sbostic sup = fs->lfs_segtab + sp->seg_number; 73551188Sbostic sup->su_nbytes = nblocks << fs->lfs_bshift; 73651188Sbostic sup->su_lastmod = time.tv_sec; 73751188Sbostic sup->su_flags = SEGUSE_DIRTY; 73851188Sbostic 73951188Sbostic /* 74051215Sbostic * Since we need to guarantee that the summary block gets written last, 74151188Sbostic * we issue the writes in two sets. The first n-1 buffers first, and 742*51301Sbostic * then, after they've completed, the summary buffer. Only when that 74351215Sbostic * final write completes is the segment valid. 74451188Sbostic */ 74551188Sbostic devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp; 746*51301Sbostic --nblocks; /* Don't count summary. */ 747*51301Sbostic 74851188Sbostic sp->nextp = fs->lfs_seglist; 74951188Sbostic fs->lfs_seglist = sp; 75051215Sbostic 751*51301Sbostic s = splbio(); 752*51301Sbostic fs->lfs_iocount += nblocks; 753*51301Sbostic splx(s); 754*51301Sbostic for (bpp = sp->bpp, i = 0; i < nblocks; ++i, ++bpp) 755*51301Sbostic (devvp->v_op->vop_strategy)(*bpp); 75651188Sbostic } 75751188Sbostic 75851215Sbostic static void 759*51301Sbostic lfs_writesum(fs) 760*51301Sbostic LFS *fs; 761*51301Sbostic { 762*51301Sbostic BUF *bp; 763*51301Sbostic SEGMENT *next_sp, *sp; 764*51301Sbostic VNODE *devvp; 765*51301Sbostic 766*51301Sbostic printf("lfs_writesum\n"); 767*51301Sbostic devvp = NULL; 768*51301Sbostic for (sp = fs->lfs_seglist; sp; sp = next_sp) { 769*51301Sbostic bp = *(sp->cbpp - 1); 770*51301Sbostic if (devvp == NULL) 771*51301Sbostic devvp = VFSTOUFS(bp->b_vp->v_mount)->um_devvp; 772*51301Sbostic (devvp->v_op->vop_strategy)(bp); 773*51301Sbostic biowait(bp); 774*51301Sbostic next_sp = sp->nextp; 775*51301Sbostic free(sp->bpp, M_SEGMENT); 776*51301Sbostic free(sp, M_SEGMENT); 777*51301Sbostic } 778*51301Sbostic } 779*51301Sbostic 780*51301Sbostic static void 78151215Sbostic lfs_writesuper(fs, sp) 78251215Sbostic LFS *fs; 78351215Sbostic SEGMENT *sp; 78451215Sbostic { 78551215Sbostic BUF *bp; 78651215Sbostic VNODE *devvp; 78751215Sbostic 78851215Sbostic printf("lfs_writesuper\n"); 78951215Sbostic /* Get a buffer for the super block */ 79051215Sbostic fs->lfs_cksum = cksum(fs, sizeof(LFS) - sizeof(fs->lfs_cksum)); 79151215Sbostic bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); 79251215Sbostic bcopy(fs, bp->b_un.b_lfs, sizeof(LFS)); 79351215Sbostic 79451215Sbostic /* Write the first superblock; wait. */ 79551215Sbostic devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp; 796*51301Sbostic (devvp->v_op->vop_strategy)(bp); 79751215Sbostic biowait(bp); 79851215Sbostic 79951215Sbostic /* Now, write the second one for which we don't have to wait */ 80051215Sbostic bp->b_flags &= ~B_DONE; 80151215Sbostic bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 802*51301Sbostic (devvp->v_op->vop_strategy)(bp); 803*51301Sbostic 804*51301Sbostic bp->b_vp = NULL; /* No associated vnode. */ 80551215Sbostic brelse(bp); 80651215Sbostic } 80751215Sbostic 80851215Sbostic /* Block match routines used when traversing the dirty block chain. */ 80951215Sbostic match_data(bp) 81051215Sbostic BUF *bp; 81151215Sbostic { 81251215Sbostic return(bp->b_lblkno >= 0); 81351215Sbostic } 81451215Sbostic 81551215Sbostic match_dindir(bp) 81651215Sbostic BUF *bp; 81751215Sbostic { 81851215Sbostic return(bp->b_lblkno == D_INDIR); 81951215Sbostic } 82051215Sbostic 82151188Sbostic /* 82251215Sbostic * These are single indirect blocks. There are three types: 82351215Sbostic * the one in the inode (address S_INDIR = -1). 82451215Sbostic * the ones that hang off of D_INDIR the double indirect in the inode. 82551215Sbostic * these all have addresses in the range -2NINDIR to -(3NINDIR-1) 82651215Sbostic * the ones that hang off of double indirect that hang off of the 82751215Sbostic * triple indirect. These all have addresses < -(NINDIR^2). 82851215Sbostic * Since we currently don't support, triple indirect blocks, this gets simpler. 82951215Sbostic * We just need to look for block numbers less than -NIADDR. 83051215Sbostic */ 83151215Sbostic match_indir(bp) 83251215Sbostic BUF *bp; 83351215Sbostic { 83451215Sbostic return(bp->b_lblkno == S_INDIR || bp->b_lblkno < -NIADDR); 83551215Sbostic } 83651215Sbostic 83751215Sbostic /* 83851188Sbostic * Shellsort (diminishing increment sort) from Data Structures and 83951188Sbostic * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 84051188Sbostic * see also Knuth Vol. 3, page 84. The increments are selected from 84151188Sbostic * formula (8), page 95. Roughly O(N^3/2). 84251188Sbostic */ 84351188Sbostic /* 84451188Sbostic * This is our own private copy of shellsort because we want to sort 84551188Sbostic * two parallel arrays (the array of buffer pointers and the array of 84651188Sbostic * logical block numbers) simultaneously. Note that we cast the array 84751188Sbostic * of logical block numbers to a unsigned in this routine so that the 84851188Sbostic * negative block numbers (meta data blocks) sort AFTER the data blocks. 84951188Sbostic */ 85051188Sbostic static void 85151188Sbostic shellsort(bp_array, lb_array, nmemb) 85251188Sbostic BUF **bp_array; 85351215Sbostic daddr_t *lb_array; 85451188Sbostic register int nmemb; 85551188Sbostic { 85651188Sbostic static int __rsshell_increments[] = { 4, 1, 0 }; 85751188Sbostic register int incr, *incrp, t1, t2; 85851188Sbostic BUF *bp_temp; 85951188Sbostic u_long lb_temp; 86051188Sbostic 86151188Sbostic for (incrp = __rsshell_increments; incr = *incrp++;) 86251188Sbostic for (t1 = incr; t1 < nmemb; ++t1) 86351188Sbostic for (t2 = t1 - incr; t2 >= 0;) 86451188Sbostic if (lb_array[t2] > lb_array[t2 + incr]) { 86551188Sbostic lb_temp = lb_array[t2]; 86651188Sbostic lb_array[t2] = lb_array[t2 + incr]; 86751188Sbostic lb_array[t2 + incr] = lb_temp; 86851188Sbostic bp_temp = bp_array[t2]; 86951188Sbostic bp_array[t2] = bp_array[t2 + incr]; 87051188Sbostic bp_array[t2 + incr] = bp_temp; 87151188Sbostic t2 -= incr; 87251188Sbostic } else 87351188Sbostic break; 87451188Sbostic } 87551215Sbostic #endif /* LOGFS */ 876