/* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * %sccs.include.redist.c% * * @(#)lfs_segment.c 5.1 (Berkeley) 09/25/91 */ #include "param.h" #include "systm.h" #include "namei.h" #include "resourcevar.h" #include "kernel.h" #include "file.h" #include "stat.h" #include "buf.h" #include "proc.h" #include "conf.h" #include "vnode.h" #include "specdev.h" #include "fifo.h" #include "malloc.h" #include "mount.h" #include "../ufs/lockf.h" #include "../ufs/quota.h" #include "../ufs/inode.h" #include "../ufs/dir.h" #include "../ufs/ufsmount.h" #include "lfs.h" #include "lfs_extern.h" /* Need to write the inodes out. The indirect buffers need to be marked dirty What about sync? How do you wait on the last I/O? Need to keep vnode v_numoutput up to date for pending writes. */ static int lfs_biocallback __P((BUF *)); static void lfs_endsum __P((LFS *, SEGMENT *, int)); static BUF *lfs_newbuf __P((LFS *, daddr_t, size_t)); static SEGMENT *lfs_newseg __P((LFS *)); static void lfs_newsum __P((LFS *, SEGMENT *)); static daddr_t lfs_nextseg __P((LFS *)); static int lfs_updatemeta __P((LFS *, INODE *, FINFO *, BUF **)); static SEGMENT *lfs_writefile __P((SEGMENT *, LFS *, VNODE *)); static void lfs_writemeta __P((void)); static void lfs_writeseg __P((LFS *, SEGMENT *)); static void shellsort __P((BUF **, u_long *, register int)); /* * XXX -- when we add fragments in here, we will need to allocate a larger * buffer pointer array (sp->bpp). */ int lfs_segwrite(mp) MOUNT *mp; { FINFO *fip; /* current file info structure */ INODE *ip; LFS *fs; VNODE *vp; SEGMENT *sp; printf("lfs_segwrite: %s %s\n", mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); fs = VFSTOUFS(mp)->um_lfs; sp = lfs_newseg(fs); loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ printf("lfs_segwrite: processing inum %d\n", VTOI(vp)->i_number); if (vp->v_mount != mp) goto loop; if (VOP_ISLOCKED(vp)) continue; ip = VTOI(vp); if (ip->i_number == LFS_IFILE_INUM) continue; if ((ip->i_flag & (IMOD|IACC|IUPD|ICHG)) == 0 && vp->v_dirtyblkhd == NULL) continue; if (vget(vp)) goto loop; sp = lfs_writefile(sp, fs, vp); /* Need to take care of inode now */ printf("lfs_segwrite: need to add dinode %d to seg\n", ip->i_din.di_inum); vput(vp); } /* * Force stale file system control information to be flushed. */ lfs_writeseg(fs, sp); /* vflushbuf(ump->um_devvp, waitfor == MNT_WAIT ? B_SYNC : 0); */ printf("lfs_segwrite: returning from segwrite\n"); return (0); } static int lfs_biocallback(bp) BUF *bp; { LFS *fs; SEGMENT *sp, *next_sp; UFSMOUNT *ump; VNODE *devvp; ump = VFSTOUFS(bp->b_vp->v_mount); fs = ump->um_lfs; devvp = ump->um_devvp; /* XXX splbio(); */ printf("lfs_biocallback: iocount: %d\n", fs->lfs_iocount); if (--fs->lfs_iocount) { /* Fire off summary writes */ for (sp = fs->lfs_seglist; sp; sp = next_sp) { next_sp = sp->nextp; (*(devvp->v_op->vop_strategy))(*(sp->cbpp - 1)); printf("free: segsum %x bpp %x sp %x\n", sp->segsum, sp->bpp, sp); free(sp->segsum, M_SEGMENT); free(sp->bpp, M_SEGMENT); free(sp, M_SEGMENT); } } } static void lfs_endsum(fs, sp, calc_next) LFS *fs; SEGMENT *sp; int calc_next; /* if 1, calculate next, else -1 */ { BUF *bp; SEGSUM *ssp; daddr_t next_addr; int npages, nseg_pages; printf("lfs_endsum\n"); ssp = sp->segsum; if (!calc_next) ssp->ss_nextsum = (daddr_t) -1; nseg_pages = sp->sum_num / (fs->lfs_bsize / LFS_SUMMARY_SIZE); if ((sp->sum_num % (fs->lfs_bsize / LFS_SUMMARY_SIZE)) == 0) { /* * May need to change the nextsum field on the previous * summary header in which case we need to recompute the * checksum as well. */ npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); next_addr = fs->lfs_sboffs[0] + (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) - fsbtodb(fs, npages) - LFS_SUMMARY_SIZE / DEV_BSIZE; if (calc_next) ssp->ss_nextsum = next_addr; ssp->ss_cksum = cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); bp = lfs_newbuf(fs, sp->sum_addr, fs->lfs_bsize); bcopy(sp->segsum, bp->b_un.b_words, fs->lfs_bsize); bp->b_flags |= B_BUSY; if (nseg_pages != 1) { bp->b_flags |= B_CALL; bp->b_iodone = lfs_biocallback; } brelse(bp); sp->bpp[fs->lfs_ssize - npages] = bp; sp->segsum = (SEGSUM *)(sp->segsum + fs->lfs_bsize - LFS_SUMMARY_SIZE); sp->sum_addr = next_addr; } else { sp->sum_addr -= LFS_SUMMARY_SIZE / DEV_BSIZE; ssp->ss_nextsum = sp->sum_addr; /* Calculate cksum on previous segment summary */ ssp->ss_cksum = cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); sp->segsum -= LFS_SUMMARY_SIZE; } } static BUF * lfs_newbuf(fs, daddr, size) LFS *fs; daddr_t daddr; size_t size; { BUF *bp; VNODE *devvp; printf("lfs_newbuf\n"); bp = getnewbuf(); bremhash(bp); /* * XXX * Need a devvp, but this isn't a particularly clean way to get one. */ devvp = VTOI(fs->lfs_ivnode)->i_devvp; bgetvp(devvp, bp); bp->b_bcount = 0; bp->b_lblkno = daddr; bp->b_blkno = daddr; bp->b_error = 0; bp->b_resid = 0; binshash(bp, BUFHASH(devvp, daddr)); allocbuf(bp, size); return (bp); } /* * Start a new segment */ static SEGMENT * lfs_newseg(fs) LFS *fs; { SEGMENT *sp; SEGUSE *sup; printf("lfs_newseg\n"); /* Get buffer space to write out a segment */ sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK); sp->cbpp = sp->bpp = malloc(fs->lfs_ssize * sizeof(BUF *), M_SEGMENT, M_WAITOK); sp->nextp = NULL; sp->sum_bytes_left = LFS_SUMMARY_SIZE; sp->seg_bytes_left = (fs->lfs_segmask + 1) - LFS_SUMMARY_SIZE; sp->saddr = fs->lfs_nextseg; sp->sum_addr = sp->saddr + sp->seg_bytes_left / DEV_BSIZE; sp->ninodes = 0; sp->sum_num = -1; sp->seg_number = (sp->saddr - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize); /* initialize segment summary info */ lfs_newsum(fs, sp); sup = fs->lfs_segtab + sp->seg_number; if (sup->su_nbytes != 0) { /* This is a segment containing a super block */ FINFO *fip; daddr_t lbn, *lbnp; fip = sp->fip; fip->fi_nblocks = LFS_SBPAD >> fs->lfs_bshift; fip->fi_version = 1; fip->fi_ino = LFS_UNUSED_INUM; sp->saddr += fsbtodb(fs, fip->fi_nblocks); lbnp = fip->fi_blocks; for (lbn = 0; lbn < fip->fi_nblocks; lbn++) *lbnp++ = lbn; sp->seg_bytes_left -= sup->su_nbytes; sp->sum_bytes_left -= sizeof(FINFO) + (fip->fi_nblocks - 1) * sizeof(daddr_t); sp->fip = (FINFO *)lbnp; } return(sp); } static void lfs_newsum(fs, sp) LFS *fs; SEGMENT *sp; { SEGSUM *ssp; void *sum; printf("lfs_newsum\n"); sp->sum_num++; if (sp->sum_num == 0) { sum = malloc(fs->lfs_bsize, M_SEGMENT, M_WAITOK); sp->segsum = sum + fs->lfs_bsize - LFS_SUMMARY_SIZE; ssp = sp->segsum; ssp->ss_next = fs->lfs_nextseg = lfs_nextseg(fs); ssp->ss_prev = fs->lfs_lastseg; } else { lfs_endsum(fs, sp, 1); ssp = sp->segsum; ssp->ss_next = ssp->ss_next; ssp->ss_prev = ssp->ss_prev; } /* Initialize segment summary info. */ sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM)); ssp->ss_nextsum = (daddr_t)-1; ssp->ss_create = time.tv_sec; ssp->ss_nfinfo = 0; ssp->ss_ninos = 0; sp->sum_bytes_left -= LFS_SUMMARY_SIZE; sp->seg_bytes_left -= LFS_SUMMARY_SIZE; } #define seginc(fs, sn) ((sn + 1) % fs->lfs_nseg) static daddr_t lfs_nextseg(fs) LFS *fs; { int segnum, sn; SEGUSE *sup; printf("lfs_nextseg\n"); segnum = satosn(fs, fs->lfs_nextseg); for (sn = seginc(fs, sn); sn != segnum; sn = seginc(fs, sn)) { sup = &fs->lfs_segtab[sn]; if (!(sup->su_flags & SEGUSE_DIRTY)) break; } if (sn == segnum) panic("lfs_nextseg: file system full"); /* XXX */ return(sntosa(fs, sn)); } /* * Update the metadata that points to the blocks listed in the FIP * array. */ static lfs_updatemeta(fs, ip, fip, bpp) LFS *fs; INODE *ip; FINFO *fip; BUF **bpp; { SEGUSE *segup; BUF **lbpp, *bp; daddr_t da, iblkno; int error, i, oldsegnum; long lbn, *lbp; printf("lfs_updatemeta\n"); for (lbpp= bpp, lbp = fip->fi_blocks, i = 0; i < fip->fi_nblocks; i++, lbp++, bp++) { lbn = *lbp; if (error = lfs_bmap(ip, lbn, &da)) return(error); if (da) { oldsegnum = (da - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize); segup = fs->lfs_segtab+oldsegnum; segup->su_lastmod = time.tv_sec; if ((segup->su_nbytes -= fs->lfs_bsize) < 0) printf("lfs_updatemeta: negative bytes %s %d\n", "in segment", oldsegnum); } /* Now change whoever points to lbn */ if (lbn < NDADDR) ip->i_din.di_db[lbn] = (*lbpp)->b_blkno; else if ((lbn -= NDADDR) < NINDIR(fs)) { printf("lfs_updatemeta: changing indirect block %d\n", S_INDIR); error = bread(ITOV(ip), S_INDIR, fs->lfs_bsize, NOCRED, &bp); if (error) return(error); bp->b_un.b_daddr[lbn] = (*lbpp)->b_blkno; brelse(bp); } else if ( (lbn = (lbn - NINDIR(fs)) / NINDIR(fs)) < NINDIR(fs)) { iblkno = - (lbn + NIADDR + 1); printf("lfs_updatemeta: changing indirect block %d\n", iblkno); error = bread(ITOV(ip), iblkno, fs->lfs_bsize, NOCRED, &bp); if (error) return(error); bp->b_un.b_daddr[lbn % NINDIR(fs)] = (*lbpp)->b_blkno; } else return(EFBIG); } return(0); } /* * Returns 0 if the entire file fit into the current segment and * summary region, 1 if not. * XXX -- I think we need to figure out what to do if we write * the segment and find more dirty blocks when we're done. */ static SEGMENT * lfs_writefile(sp, fs, vp) SEGMENT *sp; LFS *fs; VNODE *vp; { register BUF *bp; BUF **bpp, *nbp; FINFO *fip; INODE *ip; int db_per_fsb, error, i; int ret_val, s; long *lbp; /* initialize the FINFO structure */ ip = VTOI(vp); printf("lfs_writefile: node %d\n", ip->i_number); loop: fip = sp->fip; fip->fi_nblocks = 0; fip->fi_ino = ip->i_number; fip->fi_version = lfs_getversion(fs, ip->i_number); lbp = fip->fi_blocks; bpp = sp->cbpp; s = splbio(); for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { nbp = bp->b_blockf; printf("lfs_writefile: disk block num %d flags %x\n", bp->b_blkno, bp->b_flags); if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("lfs_write: not dirty"); bremfree(bp); bp->b_flags |= (B_BUSY | B_CALL); bp->b_iodone = lfs_biocallback; /* UFS does the bawrites and bwrites here; we don't */ *lbp++ = bp->b_lblkno; /* UPDATE META HERE */ *sp->cbpp++ = bp; fip->fi_nblocks++; sp->sum_bytes_left -= sizeof(daddr_t); sp->seg_bytes_left -= bp->b_bufsize; if (sp->sum_bytes_left < sizeof(daddr_t) || sp->seg_bytes_left < fs->lfs_bsize) { /* * We are about to allocate a new summary block * and possibly a new segment. So, we need to * sort the blocks we've done so far, and assign * the disk addresses, so we can start a new block * correctly. We may be doing I/O so we need to * release the s lock before doing anything. */ splx(s); if (error = lfs_updatemeta(fs, ip, fip, bpp)) panic("lfs_writefile: error from lfs_updatemeta\n"); /* Put this file in the segment summary */ ((SEGSUM *)(sp->segsum))->ss_nfinfo++; if (sp->seg_bytes_left < fs->lfs_bsize) { lfs_writeseg(fs, sp); sp = lfs_newseg(fs); } else if (sp->sum_bytes_left < sizeof(daddr_t)) lfs_newsum(fs, sp); fip = sp->fip; s = splbio(); } } splx(s); db_per_fsb = 1 << fs->lfs_fsbtodb; shellsort(bpp, (u_long *)fip->fi_blocks, fip->fi_nblocks); for (bp = *bpp, i = 0; i < fip->fi_nblocks; i++, bp++) { bp->b_blkno = sp->saddr; sp->saddr += db_per_fsb; /* * Update the meta data now for this file. If we've filled * a segment, then we'll have to wait until the next segment * to write out the updated metadata. */ lfs_writemeta(); } (void)printf("lfs_writefile: adding %d blocks to segment\n", fip->fi_nblocks); if (fip->fi_nblocks) { ((SEGSUM *)(sp->segsum))->ss_nfinfo++; sp->fip = (FINFO *)((u_long)fip + sizeof(FINFO) + sizeof(u_long) * (fip->fi_nblocks - 1)); } return(sp); } static void lfs_writemeta() { printf("lfs_writemeta (STUB)\n"); } static void lfs_writeseg(fs, sp) LFS *fs; SEGMENT *sp; { BUF **bpp, *bp; SEGSUM *ssp; SEGUSE *sup; VNODE *devvp; int nblocks, nbuffers, ninode_blocks, nsegsums, nsum_pb; int i, metaoff, nmeta; printf("lfs_writeseg\n"); ssp = sp->segsum; nsum_pb = fs->lfs_bsize / LFS_SUMMARY_SIZE; /* * This is a hack because we're currently allocating summary segments * in full blocks. It will go away when we do fragments, when we'll * allocate fragment sized summary blocks. */ do { sp->sum_num++; lfs_endsum(fs, sp, 0); } while (sp->sum_num % nsum_pb); nbuffers = sp->cbpp - sp->bpp; nsegsums = (sp->sum_num + nsum_pb - 1) / nsum_pb; ninode_blocks = (sp->ninodes + INOPB(fs) - 1)/INOPB(fs); /* Do checksum for last segment summary */ ssp->ss_cksum = cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); /* Finish off any inodes */ /* * Copy inode and summary block buffer pointers down so they are * contiguous with the page buffer pointers */ nmeta = 1 + ninode_blocks + nsegsums; metaoff = fs->lfs_ssize - nmeta; if (sp->bpp + metaoff != sp->cbpp) bcopy(sp->bpp+metaoff, sp->cbpp, sizeof(BUF *) * nmeta); nblocks = nbuffers + ninode_blocks + nsegsums; sup = fs->lfs_segtab + sp->seg_number; sup->su_nbytes = nblocks << fs->lfs_bshift; sup->su_lastmod = time.tv_sec; sup->su_flags = SEGUSE_DIRTY; /* * Since we need to guarantee that our last buffer gets written last, * we issue the writes in two sets. The first n-1 buffers first, and * then, after they've completed, the last 1 buffer. Only when that * final write completes is the segment actually written. */ devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp; /* MIS -- THIS COULD BE BAD IF WE GOT INTERRUPTED IN THE MIDDLE OF THIS */ fs->lfs_iocount += nblocks - 1; sp->nextp = fs->lfs_seglist; fs->lfs_seglist = sp; for (bpp = sp->bpp, i = 0; i < (nblocks - 1); i++) { bp = *bpp; printf("lfs_writeseg: buffer: ino %d lbn %d flags %lx\n", VTOI(bp->b_vp)->i_number, bp->b_lblkno, bp->b_flags); (*(devvp->v_op->vop_strategy))(*bpp++); } } /* * Shellsort (diminishing increment sort) from Data Structures and * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; * see also Knuth Vol. 3, page 84. The increments are selected from * formula (8), page 95. Roughly O(N^3/2). */ /* * This is our own private copy of shellsort because we want to sort * two parallel arrays (the array of buffer pointers and the array of * logical block numbers) simultaneously. Note that we cast the array * of logical block numbers to a unsigned in this routine so that the * negative block numbers (meta data blocks) sort AFTER the data blocks. */ static void shellsort(bp_array, lb_array, nmemb) BUF **bp_array; u_long *lb_array; register int nmemb; { static int __rsshell_increments[] = { 4, 1, 0 }; register int incr, *incrp, t1, t2; BUF *bp_temp; u_long lb_temp; for (incrp = __rsshell_increments; incr = *incrp++;) for (t1 = incr; t1 < nmemb; ++t1) for (t2 = t1 - incr; t2 >= 0;) if (lb_array[t2] > lb_array[t2 + incr]) { lb_temp = lb_array[t2]; lb_array[t2] = lb_array[t2 + incr]; lb_array[t2 + incr] = lb_temp; bp_temp = bp_array[t2]; bp_array[t2] = bp_array[t2 + incr]; bp_array[t2 + incr] = bp_temp; t2 -= incr; } else break; }