151188Sbostic /* 251188Sbostic * Copyright (c) 1991 Regents of the University of California. 351188Sbostic * All rights reserved. 451188Sbostic * 551188Sbostic * %sccs.include.redist.c% 651188Sbostic * 7*52688Sbostic * @(#)lfs_segment.c 7.13 (Berkeley) 02/28/92 851188Sbostic */ 951188Sbostic 1051490Sbostic #include <sys/param.h> 1151490Sbostic #include <sys/systm.h> 1251490Sbostic #include <sys/namei.h> 1352085Sbostic #include <sys/kernel.h> 1451490Sbostic #include <sys/resourcevar.h> 1551490Sbostic #include <sys/file.h> 1651490Sbostic #include <sys/stat.h> 1751490Sbostic #include <sys/buf.h> 1851490Sbostic #include <sys/proc.h> 1951490Sbostic #include <sys/conf.h> 2051490Sbostic #include <sys/vnode.h> 2151490Sbostic #include <sys/specdev.h> 2251490Sbostic #include <sys/fifo.h> 2351490Sbostic #include <sys/malloc.h> 2451490Sbostic #include <sys/mount.h> 2551188Sbostic 2651499Sbostic #include <ufs/ufs/quota.h> 2751499Sbostic #include <ufs/ufs/inode.h> 2851499Sbostic #include <ufs/ufs/dir.h> 2951499Sbostic #include <ufs/ufs/ufsmount.h> 3051490Sbostic 3151499Sbostic #include <ufs/lfs/lfs.h> 3251499Sbostic #include <ufs/lfs/lfs_extern.h> 3351490Sbostic 3451860Sbostic /* In-memory description of a segment about to be written. */ 3551860Sbostic struct segment { 3652085Sbostic struct buf **bpp; /* pointer to buffer array */ 3752085Sbostic struct buf **cbpp; /* pointer to next available bp */ 3852085Sbostic struct buf *ibp; /* buffer pointer to inode page */ 3952085Sbostic struct finfo *fip; /* current fileinfo pointer */ 4051860Sbostic void *segsum; /* segment summary info */ 4151860Sbostic u_long ninodes; /* number of inodes in this segment */ 4251860Sbostic u_long seg_bytes_left; /* bytes left in segment */ 4351860Sbostic u_long sum_bytes_left; /* bytes left in summary block */ 4451860Sbostic u_long seg_number; /* number of this segment */ 4551860Sbostic #define SEGM_CKP 0x01 /* doing a checkpoint */ 4651860Sbostic u_long seg_flags; /* run-time flags for this segment */ 4751860Sbostic }; 4851860Sbostic 4951188Sbostic /* 5051860Sbostic * Determine if it's OK to start a partial in this segment, or if we need 5151860Sbostic * to go on to a new segment. 5251301Sbostic */ 5351860Sbostic #define LFS_PARTIAL_FITS(fs) \ 5451860Sbostic ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 5551860Sbostic 1 << (fs)->lfs_fsbtodb) 5651188Sbostic 5752085Sbostic int lfs_callback __P((struct buf *)); 5852085Sbostic void lfs_gather __P((struct lfs *, struct segment *, 5952085Sbostic struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 6052085Sbostic void lfs_initseg __P((struct lfs *, struct segment *)); 6152085Sbostic void lfs_iset __P((struct inode *, daddr_t, time_t)); 6252085Sbostic int lfs_match_data __P((struct lfs *, struct buf *)); 6352085Sbostic int lfs_match_dindir __P((struct lfs *, struct buf *)); 6452085Sbostic int lfs_match_indir __P((struct lfs *, struct buf *)); 6552085Sbostic int lfs_match_tindir __P((struct lfs *, struct buf *)); 6652085Sbostic struct buf * 67*52688Sbostic lfs_newbuf __P((struct lfs *, daddr_t, size_t)); 6852077Sbostic void lfs_newseg __P((struct lfs *)); 6952085Sbostic void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 7052077Sbostic void lfs_updatemeta __P((struct lfs *, 7152085Sbostic struct segment *, struct vnode *, daddr_t *, struct buf **, int)); 7252085Sbostic void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 7352085Sbostic void lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 7452085Sbostic void lfs_writeseg __P((struct lfs *, struct segment *)); 7552085Sbostic void lfs_writesuper __P((struct lfs *, struct segment *)); 7651188Sbostic 7751860Sbostic int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 7851860Sbostic 7952328Sbostic /* 8052328Sbostic * Ifile and meta data blocks are not marked busy, so segment writes MUST be 8152328Sbostic * single threaded. Currently, there are two paths into lfs_segwrite, sync() 8252328Sbostic * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 8352328Sbostic * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 8452328Sbostic */ 8552328Sbostic 8651188Sbostic int 8752328Sbostic lfs_vflush(vp) 8852328Sbostic struct vnode *vp; 8952328Sbostic { 9052328Sbostic struct inode *ip; 9152328Sbostic struct lfs *fs; 9252328Sbostic struct mount *mp; 9352328Sbostic struct segment *sp; 9452328Sbostic int error, s; 9552328Sbostic 9652328Sbostic #ifdef VERBOSE 9752328Sbostic printf("lfs_vflush\n"); 9852328Sbostic #endif 9952328Sbostic mp = vp->v_mount; 10052328Sbostic fs = VFSTOUFS(mp)->um_lfs; 10152328Sbostic 10252328Sbostic /* 10352328Sbostic * XXX 10452328Sbostic * check flags? 10552328Sbostic * mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY) || 10652328Sbostic */ 10752328Sbostic if (vfs_busy(mp)) 10852328Sbostic return (0); 10952328Sbostic 11052328Sbostic /* 11152328Sbostic * Allocate a segment structure and enough space to hold pointers to 11252328Sbostic * the maximum possible number of buffers which can be described in a 11352328Sbostic * single summary block. 11452328Sbostic */ 11552328Sbostic sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 11652328Sbostic sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 11752328Sbostic sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 11852328Sbostic sp->seg_flags = SEGM_CKP; 11952328Sbostic lfs_initseg(fs, sp); 12052328Sbostic 12152328Sbostic /* 12252328Sbostic * Keep a cumulative count of the outstanding I/O operations. If the 12352328Sbostic * disk drive catches up with us it could go to zero before we finish, 12452328Sbostic * so we artificially increment it by one until we've scheduled all of 12552328Sbostic * the writes we intend to do. 12652328Sbostic */ 12752328Sbostic s = splbio(); 128*52688Sbostic ++fs->lfs_iocount; 12952328Sbostic splx(s); 13052328Sbostic 13152328Sbostic if (vp->v_dirtyblkhd != NULL) 13252328Sbostic lfs_writefile(fs, sp, vp); 13352328Sbostic ip = VTOI(vp); 13452328Sbostic lfs_writeinode(fs, sp, ip); 13552328Sbostic ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 13652328Sbostic 13752328Sbostic lfs_writeseg(fs, sp); 13852328Sbostic 13952328Sbostic /* 14052328Sbostic * If the I/O count is non-zero, sleep until it reaches zero. At the 14152328Sbostic * moment, the user's process hangs around so we can sleep. 14252328Sbostic */ 14352328Sbostic s = splbio(); 14452328Sbostic if (--fs->lfs_iocount && (error = 145*52688Sbostic tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) 14652328Sbostic return (error); 14752328Sbostic splx(s); 14852328Sbostic vfs_unbusy(mp); 14952328Sbostic 15052328Sbostic free(sp->bpp, M_SEGMENT); 15152328Sbostic free(sp, M_SEGMENT); 15252328Sbostic 15352328Sbostic return (0); 15452328Sbostic } 15552328Sbostic 15652328Sbostic int 15751215Sbostic lfs_segwrite(mp, do_ckp) 15852085Sbostic struct mount *mp; 15951860Sbostic int do_ckp; /* Do a checkpoint. */ 16051188Sbostic { 16152085Sbostic struct inode *ip; 16251499Sbostic struct lfs *fs; 16352085Sbostic struct segment *sp; 16452085Sbostic struct vnode *vp; 16552328Sbostic int error, islocked, s; 16651188Sbostic 16751860Sbostic #ifdef VERBOSE 16851860Sbostic printf("lfs_segwrite\n"); 16951860Sbostic #endif 17052328Sbostic fs = VFSTOUFS(mp)->um_lfs; 17152085Sbostic 17251860Sbostic /* 17352328Sbostic * Allocate a segment structure and enough space to hold pointers to 17452328Sbostic * the maximum possible number of buffers which can be described in a 17552328Sbostic * single summary block. 17652328Sbostic */ 17752328Sbostic sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 17852328Sbostic sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 17952328Sbostic sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 18052328Sbostic sp->seg_flags = do_ckp ? SEGM_CKP : 0; 18152328Sbostic lfs_initseg(fs, sp); 18252328Sbostic 18352328Sbostic /* 184*52688Sbostic * Keep a cumulative count of the outstanding I/O operations. If the 185*52688Sbostic * disk drive catches up with us it could go to zero before we finish, 186*52688Sbostic * so we artificially increment it by one until we've scheduled all of 187*52688Sbostic * the writes we intend to do. If not a checkpoint, we never do the 188*52688Sbostic * final decrement, avoiding the wakeup in the callback routine. 18951860Sbostic */ 190*52688Sbostic s = splbio(); 191*52688Sbostic ++fs->lfs_iocount; 192*52688Sbostic splx(s); 19351342Sbostic 19452328Sbostic loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 19551188Sbostic /* 19651188Sbostic * If the vnode that we are about to sync is no longer 19751188Sbostic * associated with this mount point, start over. 19851188Sbostic */ 19951188Sbostic if (vp->v_mount != mp) 20051188Sbostic goto loop; 20151860Sbostic 20252328Sbostic islocked = VOP_ISLOCKED(vp); 20351860Sbostic 20452077Sbostic /* 20552077Sbostic * XXX 20652077Sbostic * This is wrong, I think -- we should just wait until we 20752077Sbostic * get the vnode and go on. Probably going to reschedule 20852077Sbostic * all of the writes we already scheduled... 20952077Sbostic */ 21052328Sbostic if (islocked) 21152328Sbostic VREF(vp); 21252328Sbostic else if (vget(vp)) 21352085Sbostic { 21452085Sbostic printf("lfs_segment: failed to get vnode (tell Keith)!\n"); 21551188Sbostic goto loop; 21652085Sbostic } 21752328Sbostic /* 21852328Sbostic * Write the inode/file if dirty and it's not the 21952328Sbostic * the IFILE. 22052328Sbostic */ 22152328Sbostic ip = VTOI(vp); 22252328Sbostic if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 22352328Sbostic vp->v_dirtyblkhd != NULL) && 22452328Sbostic ip->i_number != LFS_IFILE_INUM) { 22552328Sbostic if (vp->v_dirtyblkhd != NULL) 22652328Sbostic lfs_writefile(fs, sp, vp); 22752328Sbostic lfs_writeinode(fs, sp, ip); 22852328Sbostic ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 22952328Sbostic } 23052328Sbostic if (islocked) 23152328Sbostic vrele(vp); 23252328Sbostic else 23352328Sbostic vput(vp); 23451188Sbostic } 23551860Sbostic if (do_ckp) { 23652077Sbostic vp = fs->lfs_ivnode; 23752077Sbostic while (vget(vp)); 23852077Sbostic ip = VTOI(vp); 23952077Sbostic if (vp->v_dirtyblkhd != NULL) 24052077Sbostic lfs_writefile(fs, sp, vp); 24152077Sbostic lfs_writeinode(fs, sp, ip); 24252077Sbostic ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 24352077Sbostic vput(vp); 24451860Sbostic } 24551342Sbostic lfs_writeseg(fs, sp); 24651342Sbostic 24751215Sbostic /* 24851860Sbostic * If the I/O count is non-zero, sleep until it reaches zero. At the 24951860Sbostic * moment, the user's process hangs around so we can sleep. 25051215Sbostic */ 251*52688Sbostic s = splbio(); 252*52688Sbostic --fs->lfs_iocount; 25351860Sbostic if (do_ckp) { 254*52688Sbostic if (fs->lfs_iocount && (error = 255*52688Sbostic tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) 25651915Sbostic return (error); 25751860Sbostic splx(s); 25851860Sbostic lfs_writesuper(fs, sp); 259*52688Sbostic } else 260*52688Sbostic splx(s); 26151215Sbostic 26251927Sbostic free(sp->bpp, M_SEGMENT); 26351927Sbostic free(sp, M_SEGMENT); 26451215Sbostic 26551860Sbostic return (0); 26651188Sbostic } 26751188Sbostic 26851860Sbostic /* 26951860Sbostic * Write the dirty blocks associated with a vnode. 27051860Sbostic */ 27152077Sbostic void 27251860Sbostic lfs_writefile(fs, sp, vp) 27351499Sbostic struct lfs *fs; 27452085Sbostic struct segment *sp; 27552085Sbostic struct vnode *vp; 27651188Sbostic { 27751860Sbostic struct buf *bp; 27852085Sbostic struct finfo *fip; 27951860Sbostic IFILE *ifp; 28051188Sbostic 28151860Sbostic #ifdef VERBOSE 28251860Sbostic printf("lfs_writefile\n"); 28351860Sbostic #endif 28452085Sbostic if (sp->seg_bytes_left < fs->lfs_bsize || 28552085Sbostic sp->sum_bytes_left < sizeof(struct finfo)) { 28652085Sbostic lfs_writeseg(fs, sp); 28752085Sbostic lfs_initseg(fs, sp); 28852085Sbostic } 28952085Sbostic sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 29051215Sbostic 29152085Sbostic fip = sp->fip; 29252085Sbostic fip->fi_nblocks = 0; 29352085Sbostic fip->fi_ino = VTOI(vp)->i_number; 29452085Sbostic LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 29552085Sbostic fip->fi_version = ifp->if_version; 29652085Sbostic brelse(bp); 29751188Sbostic 29852085Sbostic /* 29952085Sbostic * It may not be necessary to write the meta-data blocks at this point, 30052085Sbostic * as the roll-forward recovery code should be able to reconstruct the 30152085Sbostic * list. 30252085Sbostic */ 30352085Sbostic lfs_gather(fs, sp, vp, lfs_match_data); 30452085Sbostic lfs_gather(fs, sp, vp, lfs_match_indir); 30552085Sbostic lfs_gather(fs, sp, vp, lfs_match_dindir); 30651860Sbostic #ifdef TRIPLE 30752085Sbostic lfs_gather(fs, sp, vp, lfs_match_tindir); 30851860Sbostic #endif 30951342Sbostic 31052085Sbostic fip = sp->fip; 31151860Sbostic #ifdef META 31252085Sbostic printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 31351860Sbostic #endif 31452085Sbostic if (fip->fi_nblocks != 0) { 31552085Sbostic ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 31652085Sbostic sp->fip = 31752085Sbostic (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 31852085Sbostic sizeof(daddr_t) * (fip->fi_nblocks - 1)); 31952682Sstaelin } else 32052682Sstaelin sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 32151215Sbostic } 32251215Sbostic 32352077Sbostic void 32451915Sbostic lfs_writeinode(fs, sp, ip) 32551915Sbostic struct lfs *fs; 32652085Sbostic struct segment *sp; 32752085Sbostic struct inode *ip; 32851915Sbostic { 32952085Sbostic struct buf *bp, *ibp; 33052077Sbostic IFILE *ifp; 33152682Sstaelin SEGUSE *sup; 33252682Sstaelin daddr_t daddr; 33352077Sbostic ino_t ino; 33451915Sbostic int ndx; 33551915Sbostic 33651915Sbostic #ifdef VERBOSE 33751915Sbostic printf("lfs_writeinode\n"); 33851915Sbostic #endif 33951915Sbostic /* Allocate a new inode block if necessary. */ 34051915Sbostic if (sp->ibp == NULL) { 34151915Sbostic /* Allocate a new segment if necessary. */ 34251915Sbostic if (sp->seg_bytes_left < fs->lfs_bsize || 34351915Sbostic sp->sum_bytes_left < sizeof(daddr_t)) { 34451915Sbostic lfs_writeseg(fs, sp); 34551915Sbostic lfs_initseg(fs, sp); 34651915Sbostic } 34751915Sbostic 34851915Sbostic /* Get next inode block. */ 34952682Sstaelin daddr = fs->lfs_offset; 35051915Sbostic fs->lfs_offset += fsbtodb(fs, 1); 35151915Sbostic sp->ibp = *sp->cbpp++ = 352*52688Sbostic lfs_newbuf(fs, daddr, fs->lfs_bsize); 35351915Sbostic 354*52688Sbostic /* Set remaining space counters. */ 35551915Sbostic sp->seg_bytes_left -= fs->lfs_bsize; 35651915Sbostic sp->sum_bytes_left -= sizeof(daddr_t); 35752077Sbostic ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 35851915Sbostic sp->ninodes / INOPB(fs) - 1; 35952682Sstaelin ((daddr_t *)(sp->segsum))[ndx] = daddr; 36051915Sbostic } 36151915Sbostic 36252085Sbostic /* Update the inode times and copy the inode onto the inode page. */ 36352077Sbostic ITIMES(ip, &time, &time); 36451915Sbostic bp = sp->ibp; 36552085Sbostic bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 36651915Sbostic 36751915Sbostic /* Increment inode count in segment summary block. */ 36851915Sbostic ++((SEGSUM *)(sp->segsum))->ss_ninos; 36951915Sbostic 37051915Sbostic /* If this page is full, set flag to allocate a new page. */ 37151915Sbostic if (++sp->ninodes % INOPB(fs) == 0) 37251915Sbostic sp->ibp = NULL; 37351915Sbostic 37451915Sbostic /* 37552077Sbostic * If updating the ifile, update the super-block. Update the disk 37652077Sbostic * address and access times for this inode in the ifile. 37751915Sbostic */ 37852077Sbostic ino = ip->i_number; 37952077Sbostic if (ino == LFS_IFILE_INUM) 38051915Sbostic fs->lfs_idaddr = bp->b_blkno; 38152077Sbostic 38252077Sbostic LFS_IENTRY(ifp, fs, ino, ibp); 38352682Sstaelin daddr = ifp->if_daddr; 38452077Sbostic ifp->if_daddr = bp->b_blkno; 38552085Sbostic LFS_UBWRITE(ibp); 38652682Sstaelin 38752682Sstaelin if (daddr != LFS_UNUSED_DADDR) { 38852682Sstaelin LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 38952682Sstaelin #ifdef DIAGNOSTIC 39052682Sstaelin if (sup->su_nbytes < sizeof(struct dinode)) 39152682Sstaelin panic("lfs: negative bytes (segment %d)\n", 39252682Sstaelin datosn(fs, daddr)); 39352682Sstaelin #endif 39452682Sstaelin sup->su_nbytes -= sizeof(struct dinode); 39552682Sstaelin LFS_UBWRITE(bp); 39652682Sstaelin } 39751915Sbostic } 39851915Sbostic 39952077Sbostic void 40051215Sbostic lfs_gather(fs, sp, vp, match) 40151499Sbostic struct lfs *fs; 40252085Sbostic struct segment *sp; 40352085Sbostic struct vnode *vp; 40452085Sbostic int (*match) __P((struct lfs *, struct buf *)); 40551215Sbostic { 40652085Sbostic struct buf **bpp, *bp, *nbp; 40752085Sbostic struct finfo *fip; 40852085Sbostic struct inode *ip; 40951215Sbostic daddr_t *lbp, *start_lbp; 41051342Sbostic u_long version; 41151342Sbostic int s; 41251215Sbostic 41351860Sbostic #ifdef VERBOSE 41451860Sbostic printf("lfs_gather\n"); 41551860Sbostic #endif 41651215Sbostic ip = VTOI(vp); 41751215Sbostic bpp = sp->cbpp; 41851215Sbostic fip = sp->fip; 41951215Sbostic start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 42051215Sbostic 42151215Sbostic s = splbio(); 42251215Sbostic for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { 42351215Sbostic nbp = bp->b_blockf; 42451915Sbostic /* 42551915Sbostic * XXX 42652682Sstaelin * Should sleep on any BUSY buffer if doing an fsync? 42751915Sbostic */ 42852328Sbostic if (bp->b_flags & B_BUSY || !match(fs, bp)) 42951215Sbostic continue; 43051342Sbostic #ifdef DIAGNOSTIC 43151860Sbostic if (!(bp->b_flags & B_DELWRI)) 43251915Sbostic panic("lfs_gather: bp not B_DELWRI"); 43351860Sbostic if (!(bp->b_flags & B_LOCKED)) 43451915Sbostic panic("lfs_gather: bp not B_LOCKED"); 43551342Sbostic #endif 43651860Sbostic /* 43751860Sbostic * If full, finish this segment. We may be doing I/O, so 43851860Sbostic * release and reacquire the splbio(). 43951860Sbostic */ 44051342Sbostic if (sp->sum_bytes_left < sizeof(daddr_t) || 44151215Sbostic sp->seg_bytes_left < fs->lfs_bsize) { 44251215Sbostic splx(s); 44351342Sbostic lfs_updatemeta(fs, 44451860Sbostic sp, vp, start_lbp, bpp, lbp - start_lbp); 44551215Sbostic 44651342Sbostic /* Add the current file to the segment summary. */ 44751342Sbostic ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 44851215Sbostic 44951342Sbostic version = fip->fi_version; 45051860Sbostic lfs_writeseg(fs, sp); 45151915Sbostic lfs_initseg(fs, sp); 45251342Sbostic 45351215Sbostic fip = sp->fip; 45451342Sbostic fip->fi_version = version; 45551215Sbostic fip->fi_ino = ip->i_number; 45651342Sbostic start_lbp = lbp = fip->fi_blocks; 45751342Sbostic 45852682Sstaelin sp->sum_bytes_left -= 45952682Sstaelin sizeof(struct finfo) - sizeof(daddr_t); 46052682Sstaelin 46151215Sbostic bpp = sp->cbpp; 46251215Sbostic s = splbio(); 46351215Sbostic } 46452682Sstaelin 46552682Sstaelin /* Insert into the buffer list, update the FINFO block. */ 46652682Sstaelin *sp->cbpp++ = bp; 46752682Sstaelin ++fip->fi_nblocks; 46852682Sstaelin *lbp++ = bp->b_lblkno; 46952682Sstaelin 47052682Sstaelin sp->sum_bytes_left -= sizeof(daddr_t); 47152682Sstaelin sp->seg_bytes_left -= bp->b_bufsize; 47251188Sbostic } 47351215Sbostic splx(s); 47451860Sbostic lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); 47551188Sbostic } 47651188Sbostic 47751342Sbostic /* 47851342Sbostic * Update the metadata that points to the blocks listed in the FINFO 47951188Sbostic * array. 48051188Sbostic */ 48152077Sbostic void 48251860Sbostic lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) 48351499Sbostic struct lfs *fs; 48452085Sbostic struct segment *sp; 48552085Sbostic struct vnode *vp; 48651215Sbostic daddr_t *lbp; 48752085Sbostic struct buf **bpp; 48851215Sbostic int nblocks; 48951188Sbostic { 49051915Sbostic SEGUSE *sup; 49152085Sbostic struct buf *bp; 49251860Sbostic INDIR a[NIADDR], *ap; 49352085Sbostic struct inode *ip; 49451915Sbostic daddr_t daddr, lbn, off; 49551860Sbostic int db_per_fsb, error, i, num; 49651188Sbostic 49751860Sbostic #ifdef VERBOSE 49851860Sbostic printf("lfs_updatemeta\n"); 49951860Sbostic #endif 50051342Sbostic if (nblocks == 0) 50151215Sbostic return; 50251215Sbostic 50351915Sbostic /* Sort the blocks. */ 50452077Sbostic lfs_shellsort(bpp, lbp, nblocks); 50551215Sbostic 50651915Sbostic /* 50751915Sbostic * Assign disk addresses, and update references to the logical 50851915Sbostic * block and the segment usage information. 50951915Sbostic */ 51051860Sbostic db_per_fsb = fsbtodb(fs, 1); 51151915Sbostic for (i = nblocks; i--; ++bpp) { 51251915Sbostic lbn = *lbp++; 51351915Sbostic (*bpp)->b_blkno = off = fs->lfs_offset; 51451860Sbostic fs->lfs_offset += db_per_fsb; 51551215Sbostic 51651860Sbostic if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 51752085Sbostic panic("lfs_updatemeta: lfs_bmaparray %d", error); 51851860Sbostic ip = VTOI(vp); 51951860Sbostic switch (num) { 52051860Sbostic case 0: 52151915Sbostic ip->i_db[lbn] = off; 52251860Sbostic break; 52351860Sbostic case 1: 52451915Sbostic ip->i_ib[a[0].in_off] = off; 52551860Sbostic break; 52651860Sbostic default: 52751860Sbostic ap = &a[num - 1]; 52851860Sbostic if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 52951860Sbostic panic("lfs_updatemeta: bread bno %d", 53051860Sbostic ap->in_lbn); 53151915Sbostic bp->b_un.b_daddr[ap->in_off] = off; 53251342Sbostic lfs_bwrite(bp); 53351188Sbostic } 53451915Sbostic 53551915Sbostic /* Update segment usage information. */ 53651915Sbostic if (daddr != UNASSIGNED) { 53751915Sbostic LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 53851915Sbostic #ifdef DIAGNOSTIC 53951915Sbostic if (sup->su_nbytes < fs->lfs_bsize) 54051915Sbostic panic("lfs: negative bytes (segment %d)\n", 54151915Sbostic datosn(fs, daddr)); 54251915Sbostic #endif 54351915Sbostic sup->su_nbytes -= fs->lfs_bsize; 54452085Sbostic LFS_UBWRITE(bp); 54551915Sbostic } 54651188Sbostic } 54751188Sbostic } 54851188Sbostic 54951915Sbostic /* 55051915Sbostic * Start a new segment. 55151915Sbostic */ 55252077Sbostic void 55351915Sbostic lfs_initseg(fs, sp) 55451499Sbostic struct lfs *fs; 55552085Sbostic struct segment *sp; 55651188Sbostic { 55751915Sbostic SEGUSE *sup; 55851915Sbostic SEGSUM *ssp; 55951915Sbostic struct buf *bp; 56051915Sbostic daddr_t lbn, *lbnp; 56151215Sbostic 56251860Sbostic #ifdef VERBOSE 56351915Sbostic printf("lfs_initseg\n"); 56451860Sbostic #endif 56551915Sbostic /* Advance to the next segment. */ 56651927Sbostic if (!LFS_PARTIAL_FITS(fs)) { 56752682Sstaelin /* Wake up any cleaning procs waiting on this file system. */ 568*52688Sbostic wakeup(&fs->lfs_nextseg); 569*52688Sbostic wakeup(&lfs_allclean_wakeup); 57052682Sstaelin 57151927Sbostic lfs_newseg(fs); 57251927Sbostic fs->lfs_offset = fs->lfs_curseg; 57351915Sbostic sp->seg_number = datosn(fs, fs->lfs_curseg); 57451915Sbostic sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 57551915Sbostic 57651915Sbostic /* 57751927Sbostic * If the segment contains a superblock, update the offset 57851927Sbostic * and summary address to skip over it. 57951915Sbostic */ 58052077Sbostic LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 58151927Sbostic if (sup->su_flags & SEGUSE_SUPERBLOCK) { 58251915Sbostic fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 58351915Sbostic sp->seg_bytes_left -= LFS_SBPAD; 58451215Sbostic } 58552085Sbostic brelse(bp); 58651915Sbostic } else { 58751915Sbostic sp->seg_number = datosn(fs, fs->lfs_curseg); 58851915Sbostic sp->seg_bytes_left = (fs->lfs_dbpseg - 58951915Sbostic (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 59051915Sbostic } 59151342Sbostic 59251915Sbostic sp->ibp = NULL; 59351915Sbostic sp->ninodes = 0; 59451342Sbostic 59551915Sbostic /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 59651915Sbostic sp->cbpp = sp->bpp; 597*52688Sbostic *sp->cbpp = lfs_newbuf(fs, fs->lfs_offset, LFS_SUMMARY_SIZE); 59851915Sbostic sp->segsum = (*sp->cbpp)->b_un.b_addr; 59951915Sbostic ++sp->cbpp; 60051915Sbostic fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 60151342Sbostic 60251915Sbostic /* Set point to SEGSUM, initialize it. */ 60351915Sbostic ssp = sp->segsum; 60451915Sbostic ssp->ss_next = fs->lfs_nextseg; 60551915Sbostic ssp->ss_nfinfo = ssp->ss_ninos = 0; 60651342Sbostic 60751915Sbostic /* Set pointer to first FINFO, initialize it. */ 60852085Sbostic sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 60951915Sbostic sp->fip->fi_nblocks = 0; 61051342Sbostic 61151915Sbostic sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 61251915Sbostic sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 61351915Sbostic } 61451342Sbostic 61551915Sbostic /* 61651915Sbostic * Return the next segment to write. 61751915Sbostic */ 61852077Sbostic void 61951915Sbostic lfs_newseg(fs) 62051915Sbostic struct lfs *fs; 62151915Sbostic { 62251927Sbostic CLEANERINFO *cip; 62351915Sbostic SEGUSE *sup; 62451915Sbostic struct buf *bp; 62551927Sbostic int curseg, isdirty, sn; 62651915Sbostic 62751915Sbostic #ifdef VERBOSE 62851915Sbostic printf("lfs_newseg\n"); 62951915Sbostic #endif 63051927Sbostic /* 63151927Sbostic * Turn off the active bit for the current segment, turn on the 63251927Sbostic * active and dirty bits for the next segment, update the cleaner 63351927Sbostic * info. Set the current segment to the next segment, get a new 63451927Sbostic * next segment. 63551927Sbostic */ 63651927Sbostic LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp); 63751927Sbostic sup->su_flags &= ~SEGUSE_ACTIVE; 63852085Sbostic LFS_UBWRITE(bp); 63951927Sbostic 64051927Sbostic LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 64151927Sbostic sup->su_flags |= SEGUSE_ACTIVE | SEGUSE_DIRTY; 64252085Sbostic LFS_UBWRITE(bp); 64351927Sbostic 64451927Sbostic LFS_CLEANERINFO(cip, fs, bp); 64551927Sbostic --cip->clean; 64651927Sbostic ++cip->dirty; 64752085Sbostic LFS_UBWRITE(bp); 64851927Sbostic 64951927Sbostic fs->lfs_lastseg = fs->lfs_curseg; 65051927Sbostic fs->lfs_curseg = fs->lfs_nextseg; 65151927Sbostic for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 65251915Sbostic sn = (sn + 1) % fs->lfs_nseg; 65351927Sbostic if (sn == curseg) 65451915Sbostic panic("lfs_nextseg: no clean segments"); 65551915Sbostic LFS_SEGENTRY(sup, fs, sn, bp); 65651915Sbostic isdirty = sup->su_flags & SEGUSE_DIRTY; 65752085Sbostic brelse(bp); 65851915Sbostic if (!isdirty) 65951915Sbostic break; 66051915Sbostic } 66151927Sbostic fs->lfs_nextseg = sntoda(fs, sn); 66251188Sbostic } 66351188Sbostic 66452077Sbostic void 66551188Sbostic lfs_writeseg(fs, sp) 66651499Sbostic struct lfs *fs; 66752085Sbostic struct segment *sp; 66851188Sbostic { 669*52688Sbostic struct buf **bpp, *bp, *cbp; 67051188Sbostic SEGUSE *sup; 67152085Sbostic SEGSUM *ssp; 67251860Sbostic dev_t i_dev; 67351860Sbostic u_long *datap, *dp; 674*52688Sbostic size_t size; 675*52688Sbostic int ch_per_blk, i, nblocks, num, s, (*strategy)__P((struct buf *)); 676*52688Sbostic char *p; 67751188Sbostic 67851860Sbostic #ifdef VERBOSE 67951860Sbostic printf("lfs_writeseg\n"); 68051860Sbostic #endif 68152085Sbostic if ((nblocks = sp->cbpp - sp->bpp) == 0) 68252085Sbostic return; 68352085Sbostic 68451188Sbostic /* 68552085Sbostic * Compute checksum across data and then across summary; the first 68652085Sbostic * block (the summary block) is skipped. Set the create time here 68752085Sbostic * so that it's guaranteed to be later than the inode mod times. 68851860Sbostic * 68951860Sbostic * XXX 69051860Sbostic * Fix this to do it inline, instead of malloc/copy. 69151188Sbostic */ 69251860Sbostic datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 69351915Sbostic for (bpp = sp->bpp, i = nblocks - 1; i--;) 69451915Sbostic *dp++ = (*++bpp)->b_un.b_words[0]; 69552085Sbostic ssp = (SEGSUM *)sp->segsum; 69652103Sbostic ssp->ss_create = time.tv_sec; 69752085Sbostic ssp->ss_datasum = cksum(datap, nblocks * sizeof(u_long)); 69852085Sbostic ssp->ss_sumsum = 69952085Sbostic cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 70051927Sbostic free(datap, M_SEGMENT); 70151188Sbostic 70251860Sbostic i_dev = VTOI(fs->lfs_ivnode)->i_dev; 70351860Sbostic strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 70451301Sbostic 705*52688Sbostic /* 706*52688Sbostic * When we simply write the blocks we lose a rotation for every block 707*52688Sbostic * written. To avoid this problem, we allocate memory in chunks, copy 708*52688Sbostic * the buffers into the chunk and write the chunk. 56K was chosen as 709*52688Sbostic * some driver/controllers can't handle unsigned 16 bit transfers. 710*52688Sbostic * When the data is copied to the chunk, turn off the the B_LOCKED bit 711*52688Sbostic * and brelse the buffer (which will move them to the LRU list). Add 712*52688Sbostic * the B_CALL flag to the buffer header so we can count I/O's for the 713*52688Sbostic * checkpoints and so we can release the allocated memory. 714*52688Sbostic * 715*52688Sbostic * XXX 716*52688Sbostic * This should be removed if the new virtual memory system allows us to 717*52688Sbostic * easily make the buffers contiguous in kernel memory and if that's 718*52688Sbostic * fast enough. 719*52688Sbostic */ 720*52688Sbostic #define LFS_CHUNKSIZE (56 * 1024) 721*52688Sbostic ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 722*52688Sbostic for (bpp = sp->bpp, i = nblocks; i;) { 723*52688Sbostic num = ch_per_blk; 724*52688Sbostic if (num > i) 725*52688Sbostic num = i; 726*52688Sbostic i -= num; 727*52688Sbostic size = num * fs->lfs_bsize; 728*52688Sbostic 729*52688Sbostic cbp = lfs_newbuf(fs, (*bpp)->b_blkno, 0); 730*52688Sbostic cbp->b_dev = i_dev; 731*52688Sbostic cbp->b_flags = B_ASYNC | B_BUSY | B_CALL; 732*52688Sbostic cbp->b_iodone = lfs_callback; 733*52688Sbostic cbp->b_saveaddr = cbp->b_un.b_addr; 734*52688Sbostic cbp->b_un.b_addr = malloc(size, M_SEGMENT, M_WAITOK); 735*52688Sbostic 736*52688Sbostic s = splbio(); 737*52688Sbostic ++fs->lfs_iocount; 738*52688Sbostic for (p = cbp->b_un.b_addr; num--;) { 739*52688Sbostic bp = *bpp++; 740*52688Sbostic bcopy(bp->b_un.b_addr, p, bp->b_bcount); 741*52688Sbostic p += bp->b_bcount; 742*52688Sbostic bp->b_flags &= 743*52688Sbostic ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED); 744*52688Sbostic if (!(bp->b_flags & B_NOCACHE)) { 745*52688Sbostic bremfree(bp); 746*52688Sbostic reassignbuf(bp, bp->b_vp); 747*52688Sbostic } 748*52688Sbostic brelse(bp); 74951860Sbostic } 750*52688Sbostic splx(s); 751*52688Sbostic cbp->b_bcount = p - cbp->b_un.b_addr; 752*52688Sbostic (strategy)(cbp); 75351860Sbostic } 75452077Sbostic 75552682Sstaelin /* Update the segment usage information. */ 75652682Sstaelin LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 75752682Sstaelin sup->su_nbytes += nblocks - 1 - 75852682Sstaelin (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs) << fs->lfs_bshift; 75952682Sstaelin sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 76052682Sstaelin sup->su_lastmod = time.tv_sec; 76152682Sstaelin LFS_UBWRITE(bp); 76251188Sbostic } 76351188Sbostic 76452077Sbostic void 76551860Sbostic lfs_writesuper(fs, sp) 76651499Sbostic struct lfs *fs; 76752085Sbostic struct segment *sp; 76851301Sbostic { 76952085Sbostic struct buf *bp; 77051860Sbostic dev_t i_dev; 77152085Sbostic int (*strategy) __P((struct buf *)); 77251301Sbostic 77351860Sbostic #ifdef VERBOSE 77451860Sbostic printf("lfs_writesuper\n"); 77551860Sbostic #endif 77651860Sbostic i_dev = VTOI(fs->lfs_ivnode)->i_dev; 77751860Sbostic strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 77851356Sbostic 77951342Sbostic /* Checksum the superblock and copy it into a buffer. */ 78051499Sbostic fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 781*52688Sbostic bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); 78251860Sbostic *bp->b_un.b_lfs = *fs; 78351215Sbostic 78451356Sbostic /* Write the first superblock (wait). */ 78551860Sbostic bp->b_dev = i_dev; 78651915Sbostic bp->b_flags |= B_BUSY; 78751860Sbostic bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 78851342Sbostic (strategy)(bp); 78951215Sbostic biowait(bp); 79051342Sbostic 79151356Sbostic /* Write the second superblock (don't wait). */ 79251215Sbostic bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 79351915Sbostic bp->b_flags |= B_ASYNC | B_BUSY; 79451860Sbostic bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 79551342Sbostic (strategy)(bp); 79651215Sbostic } 79751215Sbostic 79851342Sbostic /* 79951342Sbostic * Logical block number match routines used when traversing the dirty block 80051342Sbostic * chain. 80151342Sbostic */ 80252077Sbostic int 80352077Sbostic lfs_match_data(fs, bp) 80451860Sbostic struct lfs *fs; 80552085Sbostic struct buf *bp; 80651215Sbostic { 80751342Sbostic return (bp->b_lblkno >= 0); 80851215Sbostic } 80951215Sbostic 81052077Sbostic int 81152077Sbostic lfs_match_indir(fs, bp) 81251860Sbostic struct lfs *fs; 81352085Sbostic struct buf *bp; 81451215Sbostic { 81551860Sbostic int lbn; 81651860Sbostic 81751860Sbostic lbn = bp->b_lblkno; 81851860Sbostic return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 81951215Sbostic } 82051215Sbostic 82152077Sbostic int 82252077Sbostic lfs_match_dindir(fs, bp) 82351860Sbostic struct lfs *fs; 82452085Sbostic struct buf *bp; 82551215Sbostic { 82651860Sbostic int lbn; 82751860Sbostic 82851860Sbostic lbn = bp->b_lblkno; 82951860Sbostic return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 83051215Sbostic } 83151215Sbostic 83252077Sbostic int 83352077Sbostic lfs_match_tindir(fs, bp) 83451499Sbostic struct lfs *fs; 83552085Sbostic struct buf *bp; 83651342Sbostic { 83751860Sbostic int lbn; 83851342Sbostic 83951860Sbostic lbn = bp->b_lblkno; 84051860Sbostic return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 84151860Sbostic } 84251342Sbostic 84351860Sbostic /* 84451860Sbostic * Allocate a new buffer header. 84551860Sbostic */ 84652085Sbostic struct buf * 847*52688Sbostic lfs_newbuf(fs, daddr, size) 84851860Sbostic struct lfs *fs; 84951860Sbostic daddr_t daddr; 85051860Sbostic size_t size; 85151860Sbostic { 85252085Sbostic struct buf *bp; 85351342Sbostic 85451860Sbostic #ifdef VERBOSE 85551860Sbostic printf("lfs_newbuf\n"); 85651860Sbostic #endif 85751860Sbostic bp = getnewbuf(); 85851860Sbostic bremhash(bp); 85951860Sbostic bgetvp(fs->lfs_ivnode, bp); 86051860Sbostic bp->b_bcount = 0; 86151860Sbostic bp->b_lblkno = daddr; 86251860Sbostic bp->b_blkno = daddr; 86351860Sbostic bp->b_error = 0; 86451860Sbostic bp->b_resid = 0; 865*52688Sbostic if (size) 866*52688Sbostic allocbuf(bp, size); 86751860Sbostic bp->b_flags |= B_NOCACHE; 868*52688Sbostic bp->b_saveaddr = NULL; 86951915Sbostic binshash(bp, &bfreelist[BQ_AGE]); 87051860Sbostic return (bp); 87151860Sbostic } 87251342Sbostic 87352077Sbostic int /* XXX should be void */ 87451860Sbostic lfs_callback(bp) 87552085Sbostic struct buf *bp; 87651860Sbostic { 87751860Sbostic struct lfs *fs; 87851342Sbostic 87951860Sbostic fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 88051860Sbostic #ifdef DIAGNOSTIC 88151860Sbostic if (fs->lfs_iocount == 0) 88251860Sbostic panic("lfs_callback: zero iocount\n"); 88351860Sbostic #endif 88451860Sbostic if (--fs->lfs_iocount == 0) 885*52688Sbostic wakeup(&fs->lfs_iocount); 88651915Sbostic 887*52688Sbostic if (bp->b_saveaddr) { 888*52688Sbostic free(bp->b_un.b_addr, M_SEGMENT); 889*52688Sbostic bp->b_un.b_addr = bp->b_saveaddr; 890*52688Sbostic } 89151860Sbostic brelse(bp); 89251860Sbostic } 89351342Sbostic 89451215Sbostic /* 89551188Sbostic * Shellsort (diminishing increment sort) from Data Structures and 89651188Sbostic * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 89751188Sbostic * see also Knuth Vol. 3, page 84. The increments are selected from 89851188Sbostic * formula (8), page 95. Roughly O(N^3/2). 89951188Sbostic */ 90051188Sbostic /* 90151188Sbostic * This is our own private copy of shellsort because we want to sort 90251188Sbostic * two parallel arrays (the array of buffer pointers and the array of 90351188Sbostic * logical block numbers) simultaneously. Note that we cast the array 90451188Sbostic * of logical block numbers to a unsigned in this routine so that the 90551188Sbostic * negative block numbers (meta data blocks) sort AFTER the data blocks. 90651188Sbostic */ 90752077Sbostic void 90852077Sbostic lfs_shellsort(bp_array, lb_array, nmemb) 90952085Sbostic struct buf **bp_array; 91051215Sbostic daddr_t *lb_array; 91151188Sbostic register int nmemb; 91251188Sbostic { 91351188Sbostic static int __rsshell_increments[] = { 4, 1, 0 }; 91451188Sbostic register int incr, *incrp, t1, t2; 91552085Sbostic struct buf *bp_temp; 91651188Sbostic u_long lb_temp; 91751188Sbostic 91851188Sbostic for (incrp = __rsshell_increments; incr = *incrp++;) 91951188Sbostic for (t1 = incr; t1 < nmemb; ++t1) 92051188Sbostic for (t2 = t1 - incr; t2 >= 0;) 92151188Sbostic if (lb_array[t2] > lb_array[t2 + incr]) { 92251188Sbostic lb_temp = lb_array[t2]; 92351188Sbostic lb_array[t2] = lb_array[t2 + incr]; 92451188Sbostic lb_array[t2 + incr] = lb_temp; 92551188Sbostic bp_temp = bp_array[t2]; 92651188Sbostic bp_array[t2] = bp_array[t2 + incr]; 92751188Sbostic bp_array[t2 + incr] = bp_temp; 92851188Sbostic t2 -= incr; 92951188Sbostic } else 93051188Sbostic break; 93151188Sbostic } 932