xref: /csrg-svn/sys/ufs/lfs/lfs_segment.c (revision 56159)
151188Sbostic /*
251188Sbostic  * Copyright (c) 1991 Regents of the University of California.
351188Sbostic  * All rights reserved.
451188Sbostic  *
551188Sbostic  * %sccs.include.redist.c%
651188Sbostic  *
7*56159Smargo  *	@(#)lfs_segment.c	7.34 (Berkeley) 09/02/92
851188Sbostic  */
951188Sbostic 
1051490Sbostic #include <sys/param.h>
1151490Sbostic #include <sys/systm.h>
1251490Sbostic #include <sys/namei.h>
1352085Sbostic #include <sys/kernel.h>
1451490Sbostic #include <sys/resourcevar.h>
1551490Sbostic #include <sys/file.h>
1651490Sbostic #include <sys/stat.h>
1751490Sbostic #include <sys/buf.h>
1851490Sbostic #include <sys/proc.h>
1951490Sbostic #include <sys/conf.h>
2051490Sbostic #include <sys/vnode.h>
2151490Sbostic #include <sys/malloc.h>
2251490Sbostic #include <sys/mount.h>
2351188Sbostic 
2455033Smckusick #include <miscfs/specfs/specdev.h>
2555033Smckusick #include <miscfs/fifofs/fifo.h>
2655033Smckusick 
2751499Sbostic #include <ufs/ufs/quota.h>
2851499Sbostic #include <ufs/ufs/inode.h>
2951499Sbostic #include <ufs/ufs/dir.h>
3051499Sbostic #include <ufs/ufs/ufsmount.h>
3151490Sbostic 
3251499Sbostic #include <ufs/lfs/lfs.h>
3351499Sbostic #include <ufs/lfs/lfs_extern.h>
3451490Sbostic 
3555940Sbostic #define MAX_ACTIVE	10
3651188Sbostic /*
3751860Sbostic  * Determine if it's OK to start a partial in this segment, or if we need
3851860Sbostic  * to go on to a new segment.
3951301Sbostic  */
4051860Sbostic #define	LFS_PARTIAL_FITS(fs) \
4151860Sbostic 	((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
4251860Sbostic 	1 << (fs)->lfs_fsbtodb)
4351188Sbostic 
4453347Sbostic void	 lfs_callback __P((struct buf *));
4552085Sbostic void	 lfs_gather __P((struct lfs *, struct segment *,
4652085Sbostic 	     struct vnode *, int (*) __P((struct lfs *, struct buf *))));
4755940Sbostic int	 lfs_gatherblock __P((struct segment *, struct buf *, int *));
4852085Sbostic void	 lfs_initseg __P((struct lfs *, struct segment *));
4952085Sbostic void	 lfs_iset __P((struct inode *, daddr_t, time_t));
5052085Sbostic int	 lfs_match_data __P((struct lfs *, struct buf *));
5152085Sbostic int	 lfs_match_dindir __P((struct lfs *, struct buf *));
5252085Sbostic int	 lfs_match_indir __P((struct lfs *, struct buf *));
5352085Sbostic int	 lfs_match_tindir __P((struct lfs *, struct buf *));
5452077Sbostic void	 lfs_newseg __P((struct lfs *));
5552085Sbostic void	 lfs_shellsort __P((struct buf **, daddr_t *, register int));
5655940Sbostic void	 lfs_supercallback __P((struct buf *));
5756027Sbostic void	 lfs_updatemeta __P((struct segment *));
5852085Sbostic void	 lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
5954264Sbostic int	 lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
6054264Sbostic int	 lfs_writeseg __P((struct lfs *, struct segment *));
6152085Sbostic void	 lfs_writesuper __P((struct lfs *, struct segment *));
6254264Sbostic void	 lfs_writevnodes __P((struct lfs *fs, struct mount *mp,
6354264Sbostic 	    struct segment *sp, int dirops));
6451188Sbostic 
6551860Sbostic int	lfs_allclean_wakeup;		/* Cleaner wakeup address. */
6651860Sbostic 
6752328Sbostic /*
6852328Sbostic  * Ifile and meta data blocks are not marked busy, so segment writes MUST be
6952328Sbostic  * single threaded.  Currently, there are two paths into lfs_segwrite, sync()
7052328Sbostic  * and getnewbuf().  They both mark the file system busy.  Lfs_vflush()
7152328Sbostic  * explicitly marks the file system busy.  So lfs_segwrite is safe.  I think.
7252328Sbostic  */
7352328Sbostic 
7451188Sbostic int
7552328Sbostic lfs_vflush(vp)
7652328Sbostic 	struct vnode *vp;
7752328Sbostic {
7852328Sbostic 	struct inode *ip;
7952328Sbostic 	struct lfs *fs;
8052328Sbostic 	struct segment *sp;
8152328Sbostic 	int error, s;
8252328Sbostic 
8354690Sbostic 	fs = VFSTOUFS(vp->v_mount)->um_lfs;
84*56159Smargo 	if (fs->lfs_nactive > MAX_ACTIVE)
85*56159Smargo 		return(lfs_segwrite(vp->v_mount, 1));
86*56159Smargo 
8754690Sbostic 	lfs_seglock(fs);
8852328Sbostic 
8952328Sbostic 	/*
9052328Sbostic 	 * Allocate a segment structure and enough space to hold pointers to
9152328Sbostic 	 * the maximum possible number of buffers which can be described in a
9252328Sbostic 	 * single summary block.
9352328Sbostic 	 */
9452328Sbostic 	sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
9552328Sbostic 	sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
9652328Sbostic 	    sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
9752328Sbostic 	sp->seg_flags = SEGM_CKP;
9856027Sbostic 	sp->vp = NULL;
9952328Sbostic 
10052328Sbostic 	/*
10152328Sbostic 	 * Keep a cumulative count of the outstanding I/O operations.  If the
10252328Sbostic 	 * disk drive catches up with us it could go to zero before we finish,
10352328Sbostic 	 * so we artificially increment it by one until we've scheduled all of
10452328Sbostic 	 * the writes we intend to do.
10552328Sbostic 	 */
10652328Sbostic 	s = splbio();
10752688Sbostic 	++fs->lfs_iocount;
10852328Sbostic 	splx(s);
10952328Sbostic 
11052328Sbostic 	ip = VTOI(vp);
11155551Sbostic 	do {
11255803Sbostic 		lfs_initseg(fs, sp);
11355551Sbostic 		do {
11455551Sbostic 			if (vp->v_dirtyblkhd != NULL)
11555551Sbostic 				lfs_writefile(fs, sp, vp);
11655551Sbostic 		} while (lfs_writeinode(fs, sp, ip));
11752328Sbostic 
11855551Sbostic 	} while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
11952328Sbostic 
12052328Sbostic 	/*
12152328Sbostic 	 * If the I/O count is non-zero, sleep until it reaches zero.  At the
12252328Sbostic 	 * moment, the user's process hangs around so we can sleep.
12352328Sbostic 	 */
12452328Sbostic 	s = splbio();
12552328Sbostic 	if (--fs->lfs_iocount && (error =
12652995Sbostic 	    tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) {
12752995Sbostic 		free(sp->bpp, M_SEGMENT);
12852995Sbostic 		free(sp, M_SEGMENT);
12952328Sbostic 		return (error);
13052995Sbostic 	}
13152328Sbostic 	splx(s);
13254690Sbostic 	lfs_segunlock(fs);
13352328Sbostic 
13452995Sbostic 	/*
13552995Sbostic 	 * XXX
13652995Sbostic 	 * Should be writing a checkpoint?
13752995Sbostic 	 */
13852328Sbostic 	free(sp->bpp, M_SEGMENT);
13952328Sbostic 	free(sp, M_SEGMENT);
14052328Sbostic 
14152328Sbostic 	return (0);
14252328Sbostic }
14352328Sbostic 
14454264Sbostic void
14554264Sbostic lfs_writevnodes(fs, mp, sp, dirops)
14654264Sbostic 	struct lfs *fs;
14754264Sbostic 	struct mount *mp;
14854264Sbostic 	struct segment *sp;
14954264Sbostic 	int dirops;
15054264Sbostic {
15154264Sbostic 	struct inode *ip;
15254264Sbostic 	struct vnode *vp;
15354264Sbostic 	int error, s;
15454264Sbostic 
15554264Sbostic loop:	for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
15654264Sbostic 		/*
15754264Sbostic 		 * If the vnode that we are about to sync is no longer
15854264Sbostic 		 * associated with this mount point, start over.
15954264Sbostic 		 */
16054264Sbostic 		if (vp->v_mount != mp)
16154264Sbostic 			goto loop;
16254264Sbostic 
16354264Sbostic 		if (dirops && !(vp->v_flag & VDIROP) ||
16454264Sbostic 		    !dirops && (vp->v_flag & VDIROP))
16554264Sbostic 			continue;
16654264Sbostic 		/*
16754264Sbostic 		 * XXX
16854264Sbostic 		 * Up the ref count so we don't get tossed out of
16954264Sbostic 		 * memory.
17054264Sbostic 		 */
17154264Sbostic 		VREF(vp);
17254264Sbostic 
17354264Sbostic 		/*
17454264Sbostic 		 * Write the inode/file if dirty and it's not the
17554264Sbostic 		 * the IFILE.
17654264Sbostic 		 */
17754264Sbostic 		ip = VTOI(vp);
17854264Sbostic 		if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) ||
17954264Sbostic 		    vp->v_dirtyblkhd != NULL) &&
18054264Sbostic 		    ip->i_number != LFS_IFILE_INUM) {
18154264Sbostic 			if (vp->v_dirtyblkhd != NULL)
18254264Sbostic 				lfs_writefile(fs, sp, vp);
18354264Sbostic 			(void) lfs_writeinode(fs, sp, ip);
18454264Sbostic 		}
18554264Sbostic 		vp->v_flag &= ~VDIROP;
18654264Sbostic 		vrele(vp);
18754264Sbostic 	}
18854264Sbostic }
18954264Sbostic 
19052328Sbostic int
19151215Sbostic lfs_segwrite(mp, do_ckp)
19252085Sbostic 	struct mount *mp;
19351860Sbostic 	int do_ckp;			/* Do a checkpoint. */
19451188Sbostic {
19555592Sbostic 	struct buf *bp;
19652085Sbostic 	struct inode *ip;
19751499Sbostic 	struct lfs *fs;
19852085Sbostic 	struct segment *sp;
19952085Sbostic 	struct vnode *vp;
20055592Sbostic 	SEGUSE *segusep;
20155592Sbostic 	daddr_t ibno;
20255940Sbostic 	CLEANERINFO *cip;
20355940Sbostic 	int clean, error, i, s;
20451188Sbostic 
20552328Sbostic 	fs = VFSTOUFS(mp)->um_lfs;
20655940Sbostic 
20755940Sbostic  	/*
20855940Sbostic  	 * If we have fewer than 2 clean segments, wait until cleaner
20955940Sbostic 	 * writes.
21055940Sbostic  	 */
21155940Sbostic 	do {
21255940Sbostic 		LFS_CLEANERINFO(cip, fs, bp);
21355940Sbostic 		clean = cip->clean;
21455940Sbostic 		brelse(bp);
21555940Sbostic 		if (clean <= 2) {
21655940Sbostic 			printf ("segs clean: %d\n", clean);
21755940Sbostic 			wakeup(&lfs_allclean_wakeup);
21855940Sbostic 			if (error = tsleep(&fs->lfs_avail, PRIBIO + 1,
21955940Sbostic 			    "lfs writer", 0))
22055940Sbostic 				return (error);
22155940Sbostic 		}
22255940Sbostic 	} while (clean <= 2 );
22354690Sbostic 	lfs_seglock(fs);
22452085Sbostic 
22551860Sbostic 	/*
22652328Sbostic 	 * Allocate a segment structure and enough space to hold pointers to
22752328Sbostic 	 * the maximum possible number of buffers which can be described in a
22852328Sbostic 	 * single summary block.
22952328Sbostic 	 */
23055940Sbostic 	do_ckp = do_ckp || fs->lfs_nactive > MAX_ACTIVE;
23152328Sbostic 	sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
23252328Sbostic 	sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
23352328Sbostic 	    sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
23452328Sbostic 	sp->seg_flags = do_ckp ? SEGM_CKP : 0;
23556027Sbostic 	sp->vp = NULL;
23652328Sbostic 	lfs_initseg(fs, sp);
23752328Sbostic 
23852328Sbostic 	/*
23952688Sbostic 	 * Keep a cumulative count of the outstanding I/O operations.  If the
24052688Sbostic 	 * disk drive catches up with us it could go to zero before we finish,
24152688Sbostic 	 * so we artificially increment it by one until we've scheduled all of
24252688Sbostic 	 * the writes we intend to do.  If not a checkpoint, we never do the
24352688Sbostic 	 * final decrement, avoiding the wakeup in the callback routine.
24451860Sbostic 	 */
24552688Sbostic 	s = splbio();
24655551Sbostic 	++fs->lfs_iocount;
24752688Sbostic 	splx(s);
24851342Sbostic 
24954264Sbostic 	lfs_writevnodes(fs, mp, sp, 0);
25054264Sbostic 	fs->lfs_writer = 1;
25154264Sbostic 	if (fs->lfs_dirops && (error =
25254264Sbostic 	    tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) {
25354264Sbostic 		free(sp->bpp, M_SEGMENT);
25454264Sbostic 		free(sp, M_SEGMENT);
25554264Sbostic 		fs->lfs_writer = 0;
25655551Sbostic 		return (error);
25754264Sbostic 	}
25851860Sbostic 
25954264Sbostic 	lfs_writevnodes(fs, mp, sp, 1);
26051860Sbostic 
26154264Sbostic 	/*
26255592Sbostic 	 * If we are doing a checkpoint, mark everything since the
26355592Sbostic 	 * last checkpoint as no longer ACTIVE.
26454264Sbostic 	 */
26555592Sbostic 	if (do_ckp)
26655592Sbostic 		for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz;
26755592Sbostic 		     --ibno >= fs->lfs_cleansz; ) {
26855592Sbostic 			if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize,
26955592Sbostic 			    NOCRED, &bp))
27055592Sbostic 
27155592Sbostic 				panic("lfs: ifile read");
27255592Sbostic 			segusep = (SEGUSE *)bp->b_un.b_addr;
27355592Sbostic 			for (i = fs->lfs_sepb; i--; segusep++)
27455592Sbostic 				segusep->su_flags &= ~SEGUSE_ACTIVE;
27555592Sbostic 
27655940Sbostic 			error = VOP_BWRITE(bp);
27755592Sbostic 		}
27855592Sbostic 
27954264Sbostic 	if (do_ckp || fs->lfs_doifile) {
28056086Sbostic redo:
28154264Sbostic 		vp = fs->lfs_ivnode;
28254264Sbostic 		while (vget(vp));
28352328Sbostic 		ip = VTOI(vp);
28455592Sbostic 		if (vp->v_dirtyblkhd != NULL)
28555592Sbostic 			lfs_writefile(fs, sp, vp);
28655592Sbostic 		(void)lfs_writeinode(fs, sp, ip);
28752077Sbostic 		vput(vp);
28856086Sbostic 		if (lfs_writeseg(fs, sp) && do_ckp) {
28956086Sbostic 			lfs_initseg(fs, sp);
29056086Sbostic 			goto redo;
29156086Sbostic 		}
29254264Sbostic 	} else
29354264Sbostic 		(void) lfs_writeseg(fs, sp);
29451342Sbostic 
29551215Sbostic 	/*
29651860Sbostic 	 * If the I/O count is non-zero, sleep until it reaches zero.  At the
29751860Sbostic 	 * moment, the user's process hangs around so we can sleep.
29851215Sbostic 	 */
29954264Sbostic 	fs->lfs_writer = 0;
30054264Sbostic 	fs->lfs_doifile = 0;
30154264Sbostic 	wakeup(&fs->lfs_dirops);
30254264Sbostic 
30355551Sbostic 	s = splbio();
30455551Sbostic 	--fs->lfs_iocount;
30551860Sbostic 	if (do_ckp) {
30652688Sbostic 		if (fs->lfs_iocount && (error =
30752995Sbostic 		    tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) {
30852995Sbostic 			free(sp->bpp, M_SEGMENT);
30952995Sbostic 			free(sp, M_SEGMENT);
31051915Sbostic 			return (error);
31152995Sbostic 		}
31251860Sbostic 		splx(s);
31355940Sbostic 		fs->lfs_nactive = 0;
31451860Sbostic 		lfs_writesuper(fs, sp);
31552688Sbostic 	} else
31652688Sbostic 		splx(s);
31751215Sbostic 
31854690Sbostic 	lfs_segunlock(fs);
31954690Sbostic 
32051927Sbostic 	free(sp->bpp, M_SEGMENT);
32151927Sbostic 	free(sp, M_SEGMENT);
32251215Sbostic 
32351860Sbostic 	return (0);
32451188Sbostic }
32551188Sbostic 
32651860Sbostic /*
32751860Sbostic  * Write the dirty blocks associated with a vnode.
32851860Sbostic  */
32952077Sbostic void
33051860Sbostic lfs_writefile(fs, sp, vp)
33151499Sbostic 	struct lfs *fs;
33252085Sbostic 	struct segment *sp;
33352085Sbostic 	struct vnode *vp;
33451188Sbostic {
33551860Sbostic 	struct buf *bp;
33652085Sbostic 	struct finfo *fip;
33751860Sbostic 	IFILE *ifp;
33851188Sbostic 
33952085Sbostic 	if (sp->seg_bytes_left < fs->lfs_bsize ||
34052085Sbostic 	    sp->sum_bytes_left < sizeof(struct finfo)) {
34154264Sbostic 		(void) lfs_writeseg(fs, sp);
34252085Sbostic 		lfs_initseg(fs, sp);
34352085Sbostic 	}
34452085Sbostic 	sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t);
34551215Sbostic 
34652085Sbostic 	fip = sp->fip;
34752085Sbostic 	fip->fi_nblocks = 0;
34852085Sbostic 	fip->fi_ino = VTOI(vp)->i_number;
34952085Sbostic 	LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
35052085Sbostic 	fip->fi_version = ifp->if_version;
35152085Sbostic 	brelse(bp);
35251188Sbostic 
35352085Sbostic 	/*
35452085Sbostic 	 * It may not be necessary to write the meta-data blocks at this point,
35552085Sbostic 	 * as the roll-forward recovery code should be able to reconstruct the
35652085Sbostic 	 * list.
35752085Sbostic 	 */
35852085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_data);
35952085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_indir);
36052085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_dindir);
36151860Sbostic #ifdef TRIPLE
36252085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_tindir);
36351860Sbostic #endif
36451342Sbostic 
36552085Sbostic 	fip = sp->fip;
36651860Sbostic #ifdef META
36752085Sbostic 	printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks);
36851860Sbostic #endif
36952085Sbostic 	if (fip->fi_nblocks != 0) {
37052085Sbostic 		++((SEGSUM *)(sp->segsum))->ss_nfinfo;
37152085Sbostic 		sp->fip =
37252085Sbostic 		    (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
37352085Sbostic 		    sizeof(daddr_t) * (fip->fi_nblocks - 1));
37455940Sbostic 		sp->start_lbp = &sp->fip->fi_blocks[0];
37552682Sstaelin 	} else
37652682Sstaelin 		sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t);
37751215Sbostic }
37851215Sbostic 
37954264Sbostic int
38051915Sbostic lfs_writeinode(fs, sp, ip)
38151915Sbostic 	struct lfs *fs;
38252085Sbostic 	struct segment *sp;
38352085Sbostic 	struct inode *ip;
38451915Sbostic {
38552085Sbostic 	struct buf *bp, *ibp;
38652077Sbostic 	IFILE *ifp;
38752682Sstaelin 	SEGUSE *sup;
38852682Sstaelin 	daddr_t daddr;
38952077Sbostic 	ino_t ino;
39055940Sbostic 	int error, ndx;
39154264Sbostic 	int redo_ifile = 0;
39251915Sbostic 
39355940Sbostic 	if (!(ip->i_flag & (IMOD | IACC | IUPD | ICHG)))
39455940Sbostic 		return;
39555940Sbostic 
39651915Sbostic 	/* Allocate a new inode block if necessary. */
39751915Sbostic 	if (sp->ibp == NULL) {
39851915Sbostic 		/* Allocate a new segment if necessary. */
39951915Sbostic 		if (sp->seg_bytes_left < fs->lfs_bsize ||
40051915Sbostic 		    sp->sum_bytes_left < sizeof(daddr_t)) {
40154264Sbostic 			(void) lfs_writeseg(fs, sp);
40251915Sbostic 			lfs_initseg(fs, sp);
40351915Sbostic 		}
40451915Sbostic 
40551915Sbostic 		/* Get next inode block. */
40652682Sstaelin 		daddr = fs->lfs_offset;
40751915Sbostic 		fs->lfs_offset += fsbtodb(fs, 1);
40851915Sbostic 		sp->ibp = *sp->cbpp++ =
40956056Sbostic 		    lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr,
41056056Sbostic 		    fs->lfs_bsize);
41155940Sbostic 		++sp->start_bpp;
41255940Sbostic 		fs->lfs_avail -= fsbtodb(fs, 1);
41352688Sbostic 		/* Set remaining space counters. */
41451915Sbostic 		sp->seg_bytes_left -= fs->lfs_bsize;
41551915Sbostic 		sp->sum_bytes_left -= sizeof(daddr_t);
41652077Sbostic 		ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) -
41751915Sbostic 		    sp->ninodes / INOPB(fs) - 1;
41852682Sstaelin 		((daddr_t *)(sp->segsum))[ndx] = daddr;
41951915Sbostic 	}
42051915Sbostic 
42152085Sbostic 	/* Update the inode times and copy the inode onto the inode page. */
42256056Sbostic 	if (ip->i_flag & IMOD)
42356056Sbostic 		--fs->lfs_uinodes;
42452077Sbostic 	ITIMES(ip, &time, &time);
42555940Sbostic 	ip->i_flag &= ~(IMOD | IACC | IUPD | ICHG);
42651915Sbostic 	bp = sp->ibp;
42752085Sbostic 	bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din;
42851915Sbostic 	/* Increment inode count in segment summary block. */
42951915Sbostic 	++((SEGSUM *)(sp->segsum))->ss_ninos;
43051915Sbostic 
43151915Sbostic 	/* If this page is full, set flag to allocate a new page. */
43251915Sbostic 	if (++sp->ninodes % INOPB(fs) == 0)
43351915Sbostic 		sp->ibp = NULL;
43451915Sbostic 
43551915Sbostic 	/*
43652077Sbostic 	 * If updating the ifile, update the super-block.  Update the disk
43752077Sbostic 	 * address and access times for this inode in the ifile.
43851915Sbostic 	 */
43952077Sbostic 	ino = ip->i_number;
44055696Sbostic 	if (ino == LFS_IFILE_INUM) {
44155696Sbostic 		daddr = fs->lfs_idaddr;
44251915Sbostic 		fs->lfs_idaddr = bp->b_blkno;
44355696Sbostic 	} else {
44455696Sbostic 		LFS_IENTRY(ifp, fs, ino, ibp);
44555696Sbostic 		daddr = ifp->if_daddr;
44655696Sbostic 		ifp->if_daddr = bp->b_blkno;
44755940Sbostic 		error = VOP_BWRITE(ibp);
44855696Sbostic 	}
44952077Sbostic 
45054264Sbostic 	/*
45154264Sbostic 	 * No need to update segment usage if there was no former inode address
45254264Sbostic 	 * or if the last inode address is in the current partial segment.
45354264Sbostic 	 */
45454264Sbostic 	if (daddr != LFS_UNUSED_DADDR &&
45555803Sbostic 	    !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) {
45652682Sstaelin 		LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
45752682Sstaelin #ifdef DIAGNOSTIC
45854264Sbostic 		if (sup->su_nbytes < sizeof(struct dinode)) {
45952819Sbostic 			/* XXX -- Change to a panic. */
46052819Sbostic 			printf("lfs: negative bytes (segment %d)\n",
46152682Sstaelin 			    datosn(fs, daddr));
46254264Sbostic 			panic("negative bytes");
46354264Sbostic 		}
46452682Sstaelin #endif
46552682Sstaelin 		sup->su_nbytes -= sizeof(struct dinode);
46656069Sbostic 		redo_ifile =
46756069Sbostic 		    (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
46855940Sbostic 		error = VOP_BWRITE(bp);
46952682Sstaelin 	}
47055551Sbostic 	return (redo_ifile);
47151915Sbostic }
47251915Sbostic 
47355940Sbostic int
47455940Sbostic lfs_gatherblock(sp, bp, sptr)
47555940Sbostic 	struct segment *sp;
47655940Sbostic 	struct buf *bp;
47755940Sbostic 	int *sptr;
47855940Sbostic {
47955940Sbostic 	struct lfs *fs;
48055940Sbostic 	int version;
48155940Sbostic 
48255940Sbostic 	/*
48355940Sbostic 	 * If full, finish this segment.  We may be doing I/O, so
48455940Sbostic 	 * release and reacquire the splbio().
48555940Sbostic 	 */
48656027Sbostic #ifdef DIAGNOSTIC
48756027Sbostic 	if (sp->vp == NULL)
48856027Sbostic 		panic ("lfs_gatherblock: Null vp in segment");
48956027Sbostic #endif
49055940Sbostic 	fs = sp->fs;
49155940Sbostic 	if (sp->sum_bytes_left < sizeof(daddr_t) ||
49255940Sbostic 	    sp->seg_bytes_left < fs->lfs_bsize) {
49355940Sbostic 		if (sptr)
49455940Sbostic 			splx(*sptr);
49556027Sbostic 		lfs_updatemeta(sp);
49655940Sbostic 
49755940Sbostic 		/* Add the current file to the segment summary. */
49855940Sbostic 		++((SEGSUM *)(sp->segsum))->ss_nfinfo;
49955940Sbostic 
50055940Sbostic 		version = sp->fip->fi_version;
50155940Sbostic 		(void) lfs_writeseg(fs, sp);
50255940Sbostic 		lfs_initseg(fs, sp);
50355940Sbostic 
50455940Sbostic 		sp->fip->fi_version = version;
50556027Sbostic 		sp->fip->fi_ino = VTOI(sp->vp)->i_number;
50655940Sbostic 
50755940Sbostic 		sp->sum_bytes_left -=
50855940Sbostic 		    sizeof(struct finfo) - sizeof(daddr_t);
50955940Sbostic 
51055940Sbostic 		if (sptr)
51155940Sbostic 			*sptr = splbio();
51255940Sbostic 		return(1);
51355940Sbostic 	}
51455940Sbostic 
51555940Sbostic 	/* Insert into the buffer list, update the FINFO block. */
51656056Sbostic if (bp->b_vp == sp->fs->lfs_ivnode &&
51756056Sbostic ((bp->b_lblkno == 0 && (bp->b_un.b_daddr[0] > 26 || bp->b_un.b_daddr[1] > 26)) ||
51856056Sbostic (bp->b_lblkno > 2)))
51956056Sbostic 	printf ("Bad ifile block\n");
52055940Sbostic 	bp->b_flags |= B_GATHERED;
52155940Sbostic 	*sp->cbpp++ = bp;
52255940Sbostic 	sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno;
52355940Sbostic 
52455940Sbostic 	sp->sum_bytes_left -= sizeof(daddr_t);
52555940Sbostic 	sp->seg_bytes_left -= bp->b_bufsize;
52655940Sbostic 	return(0);
52755940Sbostic }
52855940Sbostic 
52952077Sbostic void
53051215Sbostic lfs_gather(fs, sp, vp, match)
53151499Sbostic 	struct lfs *fs;
53252085Sbostic 	struct segment *sp;
53352085Sbostic 	struct vnode *vp;
53452085Sbostic 	int (*match) __P((struct lfs *, struct buf *));
53551215Sbostic {
53655940Sbostic 	struct buf *bp;
53751342Sbostic 	int s;
53851215Sbostic 
53956027Sbostic 	sp->vp = vp;
54055940Sbostic 	s = splbio();
54155940Sbostic loop:	for (bp = vp->v_dirtyblkhd; bp; bp = bp->b_blockf) {
54254264Sbostic 		if (bp->b_flags & B_BUSY || !match(fs, bp) ||
54354264Sbostic 		    bp->b_flags & B_GATHERED)
54451215Sbostic 			continue;
54551342Sbostic #ifdef DIAGNOSTIC
54651860Sbostic 		if (!(bp->b_flags & B_DELWRI))
54751915Sbostic 			panic("lfs_gather: bp not B_DELWRI");
54851860Sbostic 		if (!(bp->b_flags & B_LOCKED))
54951915Sbostic 			panic("lfs_gather: bp not B_LOCKED");
55051342Sbostic #endif
55155940Sbostic 		if (lfs_gatherblock(sp, bp, &s))
55253145Sstaelin 			goto loop;
55351188Sbostic 	}
55451215Sbostic 	splx(s);
55556027Sbostic 	lfs_updatemeta(sp);
55656027Sbostic 	sp->vp = NULL;
55751188Sbostic }
55851188Sbostic 
55955940Sbostic 
56051342Sbostic /*
56151342Sbostic  * Update the metadata that points to the blocks listed in the FINFO
56251188Sbostic  * array.
56351188Sbostic  */
56452077Sbostic void
56556027Sbostic lfs_updatemeta(sp)
56652085Sbostic 	struct segment *sp;
56751188Sbostic {
56851915Sbostic 	SEGUSE *sup;
56952085Sbostic 	struct buf *bp;
57055940Sbostic 	struct lfs *fs;
57156027Sbostic 	struct vnode *vp;
57251860Sbostic 	INDIR a[NIADDR], *ap;
57352085Sbostic 	struct inode *ip;
57451915Sbostic 	daddr_t daddr, lbn, off;
57555940Sbostic 	int db_per_fsb, error, i, nblocks, num;
57651188Sbostic 
57756027Sbostic 	vp = sp->vp;
57855940Sbostic 	nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
57956027Sbostic 	if (vp == NULL || nblocks == 0)
58051215Sbostic 		return;
58151215Sbostic 
58251915Sbostic 	/* Sort the blocks. */
58355940Sbostic 	if (!(sp->seg_flags & SEGM_CLEAN))
58455940Sbostic 		lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks);
58551215Sbostic 
58651915Sbostic 	/*
58751915Sbostic 	 * Assign disk addresses, and update references to the logical
58851915Sbostic 	 * block and the segment usage information.
58951915Sbostic 	 */
59055940Sbostic 	fs = sp->fs;
59151860Sbostic 	db_per_fsb = fsbtodb(fs, 1);
59255940Sbostic 	for (i = nblocks; i--; ++sp->start_bpp) {
59355940Sbostic 		lbn = *sp->start_lbp++;
59455940Sbostic 		(*sp->start_bpp)->b_blkno = off = fs->lfs_offset;
59551860Sbostic 		fs->lfs_offset += db_per_fsb;
59651215Sbostic 
59751860Sbostic 		if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num))
59852085Sbostic 			panic("lfs_updatemeta: lfs_bmaparray %d", error);
59951860Sbostic 		ip = VTOI(vp);
60051860Sbostic 		switch (num) {
60151860Sbostic 		case 0:
60251915Sbostic 			ip->i_db[lbn] = off;
60351860Sbostic 			break;
60451860Sbostic 		case 1:
60551915Sbostic 			ip->i_ib[a[0].in_off] = off;
60651860Sbostic 			break;
60751860Sbostic 		default:
60851860Sbostic 			ap = &a[num - 1];
60951860Sbostic 			if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
61051860Sbostic 				panic("lfs_updatemeta: bread bno %d",
61151860Sbostic 				    ap->in_lbn);
61255458Sbostic 			/*
61355458Sbostic 			 * Bread may create a new indirect block which needs
61455458Sbostic 			 * to get counted for the inode.
61555458Sbostic 			 */
61655592Sbostic 			if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) {
61755940Sbostic printf ("Updatemeta allocating indirect block: shouldn't happen\n");
61855458Sbostic 				ip->i_blocks += btodb(fs->lfs_bsize);
61955592Sbostic 				fs->lfs_bfree -= btodb(fs->lfs_bsize);
62055592Sbostic 			}
62151915Sbostic 			bp->b_un.b_daddr[ap->in_off] = off;
62253530Sheideman 			VOP_BWRITE(bp);
62351188Sbostic 		}
62451915Sbostic 
62551915Sbostic 		/* Update segment usage information. */
62651915Sbostic 		if (daddr != UNASSIGNED) {
62751915Sbostic 			LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
62851915Sbostic #ifdef DIAGNOSTIC
62954264Sbostic 			if (sup->su_nbytes < fs->lfs_bsize) {
63052819Sbostic 				/* XXX -- Change to a panic. */
63152819Sbostic 				printf("lfs: negative bytes (segment %d)\n",
63251915Sbostic 				    datosn(fs, daddr));
63354264Sbostic 				panic ("Negative Bytes");
63454264Sbostic 			}
63551915Sbostic #endif
63651915Sbostic 			sup->su_nbytes -= fs->lfs_bsize;
63755940Sbostic 			error = VOP_BWRITE(bp);
63851915Sbostic 		}
63951188Sbostic 	}
64051188Sbostic }
64151188Sbostic 
64251915Sbostic /*
64351915Sbostic  * Start a new segment.
64451915Sbostic  */
64552077Sbostic void
64651915Sbostic lfs_initseg(fs, sp)
64751499Sbostic 	struct lfs *fs;
64852085Sbostic 	struct segment *sp;
64951188Sbostic {
65051915Sbostic 	SEGUSE *sup;
65151915Sbostic 	SEGSUM *ssp;
65251915Sbostic 	struct buf *bp;
65351915Sbostic 	daddr_t lbn, *lbnp;
65451215Sbostic 
65551915Sbostic 	/* Advance to the next segment. */
65651927Sbostic 	if (!LFS_PARTIAL_FITS(fs)) {
65752682Sstaelin 		/* Wake up any cleaning procs waiting on this file system. */
65852688Sbostic 		wakeup(&fs->lfs_nextseg);
65952688Sbostic 		wakeup(&lfs_allclean_wakeup);
66052682Sstaelin 
66151927Sbostic 		lfs_newseg(fs);
66251927Sbostic 		fs->lfs_offset = fs->lfs_curseg;
66351915Sbostic 		sp->seg_number = datosn(fs, fs->lfs_curseg);
66451915Sbostic 		sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
66551915Sbostic 
66651915Sbostic 		/*
66751927Sbostic 		 * If the segment contains a superblock, update the offset
66851927Sbostic 		 * and summary address to skip over it.
66951915Sbostic 		 */
67052077Sbostic 		LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
67151927Sbostic 		if (sup->su_flags & SEGUSE_SUPERBLOCK) {
67251915Sbostic 			fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
67351915Sbostic 			sp->seg_bytes_left -= LFS_SBPAD;
67451215Sbostic 		}
67552085Sbostic 		brelse(bp);
67651915Sbostic 	} else {
67751915Sbostic 		sp->seg_number = datosn(fs, fs->lfs_curseg);
67851915Sbostic 		sp->seg_bytes_left = (fs->lfs_dbpseg -
67951915Sbostic 		    (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
68051915Sbostic 	}
68154264Sbostic 	fs->lfs_lastpseg = fs->lfs_offset;
68251342Sbostic 
68355940Sbostic 	sp->fs = fs;
68451915Sbostic 	sp->ibp = NULL;
68551915Sbostic 	sp->ninodes = 0;
68651342Sbostic 
68751915Sbostic 	/* Get a new buffer for SEGSUM and enter it into the buffer list. */
68851915Sbostic 	sp->cbpp = sp->bpp;
68956056Sbostic 	*sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset,
69056056Sbostic 	     LFS_SUMMARY_SIZE);
69151915Sbostic 	sp->segsum = (*sp->cbpp)->b_un.b_addr;
69255940Sbostic 	sp->start_bpp = ++sp->cbpp;
69351915Sbostic 	fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
69451342Sbostic 
69551915Sbostic 	/* Set point to SEGSUM, initialize it. */
69651915Sbostic 	ssp = sp->segsum;
69751915Sbostic 	ssp->ss_next = fs->lfs_nextseg;
69851915Sbostic 	ssp->ss_nfinfo = ssp->ss_ninos = 0;
69951342Sbostic 
70051915Sbostic 	/* Set pointer to first FINFO, initialize it. */
70152085Sbostic 	sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM));
70251915Sbostic 	sp->fip->fi_nblocks = 0;
70355940Sbostic 	sp->start_lbp = &sp->fip->fi_blocks[0];
70451342Sbostic 
70551915Sbostic 	sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
70651915Sbostic 	sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
70751915Sbostic }
70851342Sbostic 
70951915Sbostic /*
71051915Sbostic  * Return the next segment to write.
71151915Sbostic  */
71252077Sbostic void
71351915Sbostic lfs_newseg(fs)
71451915Sbostic 	struct lfs *fs;
71551915Sbostic {
71651927Sbostic 	CLEANERINFO *cip;
71751915Sbostic 	SEGUSE *sup;
71851915Sbostic 	struct buf *bp;
71955940Sbostic 	int curseg, error, isdirty, sn;
72051915Sbostic 
72155592Sbostic         LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
722*56159Smargo         sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
72356056Sbostic 	sup->su_nbytes = 0;
72456056Sbostic 	sup->su_nsums = 0;
72556056Sbostic 	sup->su_ninos = 0;
72655940Sbostic         (void) VOP_BWRITE(bp);
72751927Sbostic 
72851927Sbostic 	LFS_CLEANERINFO(cip, fs, bp);
72951927Sbostic 	--cip->clean;
73051927Sbostic 	++cip->dirty;
73155940Sbostic 	(void) VOP_BWRITE(bp);
73251927Sbostic 
73351927Sbostic 	fs->lfs_lastseg = fs->lfs_curseg;
73451927Sbostic 	fs->lfs_curseg = fs->lfs_nextseg;
73551927Sbostic 	for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
73651915Sbostic 		sn = (sn + 1) % fs->lfs_nseg;
73751927Sbostic 		if (sn == curseg)
73851915Sbostic 			panic("lfs_nextseg: no clean segments");
73951915Sbostic 		LFS_SEGENTRY(sup, fs, sn, bp);
74051915Sbostic 		isdirty = sup->su_flags & SEGUSE_DIRTY;
74152085Sbostic 		brelse(bp);
74251915Sbostic 		if (!isdirty)
74351915Sbostic 			break;
74451915Sbostic 	}
74555592Sbostic 
74655940Sbostic 	++fs->lfs_nactive;
74751927Sbostic 	fs->lfs_nextseg = sntoda(fs, sn);
74851188Sbostic }
74951188Sbostic 
75054264Sbostic int
75151188Sbostic lfs_writeseg(fs, sp)
75251499Sbostic 	struct lfs *fs;
75352085Sbostic 	struct segment *sp;
75451188Sbostic {
75555940Sbostic 	extern int locked_queue_count;
75652688Sbostic 	struct buf **bpp, *bp, *cbp;
75751188Sbostic 	SEGUSE *sup;
75852085Sbostic 	SEGSUM *ssp;
75951860Sbostic 	dev_t i_dev;
76054264Sbostic 	size_t size;
76151860Sbostic 	u_long *datap, *dp;
76255940Sbostic 	int ch_per_blk, do_again, error, i, nblocks, num, s;
76354264Sbostic 	int (*strategy)__P((struct vop_strategy_args *));
76454690Sbostic 	struct vop_strategy_args vop_strategy_a;
76555592Sbostic 	u_short ninos;
76652688Sbostic 	char *p;
76751188Sbostic 
76855940Sbostic 	/*
76955940Sbostic 	 * If there are no buffers other than the segment summary to write
77055940Sbostic 	 * and it is not a checkpoint, don't do anything.  On a checkpoint,
77155940Sbostic 	 * even if there aren't any buffers, you need to write the superblock.
77255940Sbostic 	 */
77355940Sbostic 	if ((nblocks = sp->cbpp - sp->bpp) == 1 && !(sp->seg_flags & SEGM_CKP))
77455551Sbostic 		return (0);
77552085Sbostic 
776*56159Smargo 	ssp = (SEGSUM *)sp->segsum;
777*56159Smargo 
778*56159Smargo 	/* Update the segment usage information. */
779*56159Smargo 	LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
780*56159Smargo 	ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
781*56159Smargo 	sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift;
782*56159Smargo 	sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode);
783*56159Smargo 	sup->su_nbytes += LFS_SUMMARY_SIZE;
784*56159Smargo 	sup->su_lastmod = time.tv_sec;
785*56159Smargo 	sup->su_ninos += ninos;
786*56159Smargo 	++sup->su_nsums;
787*56159Smargo 	do_again = !(bp->b_flags & B_GATHERED);
788*56159Smargo 	(void)VOP_BWRITE(bp);
78951188Sbostic 	/*
79052085Sbostic 	 * Compute checksum across data and then across summary; the first
79152085Sbostic 	 * block (the summary block) is skipped.  Set the create time here
79252085Sbostic 	 * so that it's guaranteed to be later than the inode mod times.
79351860Sbostic 	 *
79451860Sbostic 	 * XXX
79551860Sbostic 	 * Fix this to do it inline, instead of malloc/copy.
79651188Sbostic 	 */
79751860Sbostic 	datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
798*56159Smargo 	for (bpp = sp->bpp, i = nblocks - 1; i--;) {
799*56159Smargo 		if ((*++bpp)->b_flags & B_INVAL) {
800*56159Smargo 			if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long)))
801*56159Smargo 				panic("lfs_writeseg: copyin failed");
802*56159Smargo 		} else
803*56159Smargo 			*dp++ = (*bpp)->b_un.b_words[0];
804*56159Smargo 	}
80552103Sbostic 	ssp->ss_create = time.tv_sec;
80655803Sbostic 	ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long));
80752085Sbostic 	ssp->ss_sumsum =
80852085Sbostic 	    cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
80951927Sbostic 	free(datap, M_SEGMENT);
810*56159Smargo #ifdef DIAGNOSTIC
811*56159Smargo 	if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE)
812*56159Smargo 		panic("lfs_writeseg: No diskspace for summary");
813*56159Smargo #endif
81455592Sbostic 	fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE);
81554264Sbostic 
81651860Sbostic 	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
81753574Sheideman 	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
81851301Sbostic 
81952688Sbostic 	/*
82052688Sbostic 	 * When we simply write the blocks we lose a rotation for every block
82152688Sbostic 	 * written.  To avoid this problem, we allocate memory in chunks, copy
82252688Sbostic 	 * the buffers into the chunk and write the chunk.  56K was chosen as
82352688Sbostic 	 * some driver/controllers can't handle unsigned 16 bit transfers.
82452688Sbostic 	 * When the data is copied to the chunk, turn off the the B_LOCKED bit
82552688Sbostic 	 * and brelse the buffer (which will move them to the LRU list).  Add
82652688Sbostic 	 * the B_CALL flag to the buffer header so we can count I/O's for the
82752688Sbostic 	 * checkpoints and so we can release the allocated memory.
82852688Sbostic 	 *
82952688Sbostic 	 * XXX
83052688Sbostic 	 * This should be removed if the new virtual memory system allows us to
83152688Sbostic 	 * easily make the buffers contiguous in kernel memory and if that's
83252688Sbostic 	 * fast enough.
83352688Sbostic 	 */
83452688Sbostic #define	LFS_CHUNKSIZE	(56 * 1024)
83552688Sbostic 	ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize;
83652688Sbostic 	for (bpp = sp->bpp, i = nblocks; i;) {
83752688Sbostic 		num = ch_per_blk;
83852688Sbostic 		if (num > i)
83952688Sbostic 			num = i;
84052688Sbostic 		i -= num;
84152688Sbostic 		size = num * fs->lfs_bsize;
84252688Sbostic 
84356056Sbostic 		cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp,
84456056Sbostic 		    (*bpp)->b_blkno, size);
84552688Sbostic 		cbp->b_dev = i_dev;
84655940Sbostic 		cbp->b_flags |= B_ASYNC | B_BUSY;
84752688Sbostic 
84852688Sbostic 		s = splbio();
84952688Sbostic 		++fs->lfs_iocount;
85052688Sbostic 		for (p = cbp->b_un.b_addr; num--;) {
85152688Sbostic 			bp = *bpp++;
85255940Sbostic 			/*
85355940Sbostic 			 * Fake buffers from the cleaner are marked as B_INVAL.
85455940Sbostic 			 * We need to copy the data from user space rather than
85555940Sbostic 			 * from the buffer indicated.
85655940Sbostic 			 * XXX == what do I do on an error?
85755940Sbostic 			 */
85855940Sbostic 			if (bp->b_flags & B_INVAL) {
85955940Sbostic 				if (copyin(bp->b_saveaddr, p, bp->b_bcount))
86055940Sbostic 					panic("lfs_writeseg: copyin failed");
86155940Sbostic 			} else
86255940Sbostic 				bcopy(bp->b_un.b_addr, p, bp->b_bcount);
86352688Sbostic 			p += bp->b_bcount;
86455940Sbostic 			if (bp->b_flags & B_LOCKED)
86555940Sbostic 				--locked_queue_count;
86655940Sbostic 			bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
86754264Sbostic 			     B_LOCKED | B_GATHERED);
86855940Sbostic 			if (bp->b_flags & B_CALL) {
86955940Sbostic 				/* if B_CALL, it was created with newbuf */
87055940Sbostic 				brelvp(bp);
87155940Sbostic 				free(bp, M_SEGMENT);
87255940Sbostic 			} else {
87352688Sbostic 				bremfree(bp);
87452688Sbostic 				reassignbuf(bp, bp->b_vp);
87555940Sbostic 				brelse(bp);
87652688Sbostic 			}
87751860Sbostic 		}
87856069Sbostic 		++cbp->b_vp->v_numoutput;
87952688Sbostic 		splx(s);
88052688Sbostic 		cbp->b_bcount = p - cbp->b_un.b_addr;
88156056Sbostic 		/*
88256056Sbostic 		 * XXXX This is a gross and disgusting hack.  Since these
88356056Sbostic 		 * buffers are physically addressed, they hang off the
88456056Sbostic 		 * device vnode (devvp).  As a result, they have no way
88556056Sbostic 		 * of getting to the LFS superblock or lfs structure to
88656056Sbostic 		 * keep track of the number of I/O's pending.  So, I am
88756056Sbostic 		 * going to stuff the fs into the saveaddr field of
88856056Sbostic 		 * the buffer (yuk).
88956056Sbostic 		 */
89056056Sbostic 		cbp->b_saveaddr = (caddr_t)fs;
89153574Sheideman 		vop_strategy_a.a_desc = VDESC(vop_strategy);
89253574Sheideman 		vop_strategy_a.a_bp = cbp;
89353574Sheideman 		(strategy)(&vop_strategy_a);
89451860Sbostic 	}
89555551Sbostic 	return (do_again);
89651188Sbostic }
89751188Sbostic 
89852077Sbostic void
89951860Sbostic lfs_writesuper(fs, sp)
90051499Sbostic 	struct lfs *fs;
90152085Sbostic 	struct segment *sp;
90251301Sbostic {
90352085Sbostic 	struct buf *bp;
90451860Sbostic 	dev_t i_dev;
90553574Sheideman 	int (*strategy) __P((struct vop_strategy_args *));
90656069Sbostic 	int s;
90754690Sbostic 	struct vop_strategy_args vop_strategy_a;
90851301Sbostic 
90951860Sbostic 	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
91053574Sheideman 	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
91151356Sbostic 
91251342Sbostic 	/* Checksum the superblock and copy it into a buffer. */
91351499Sbostic 	fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
91456056Sbostic 	bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0],
91556056Sbostic 	    LFS_SBPAD);
91651860Sbostic 	*bp->b_un.b_lfs = *fs;
91751215Sbostic 
91851356Sbostic 	/* Write the first superblock (wait). */
91951860Sbostic 	bp->b_dev = i_dev;
92051915Sbostic 	bp->b_flags |= B_BUSY;
92155940Sbostic 	bp->b_flags &= ~(B_DONE | B_CALL | B_ERROR | B_READ | B_DELWRI);
92253574Sheideman 	vop_strategy_a.a_desc = VDESC(vop_strategy);
92353574Sheideman 	vop_strategy_a.a_bp = bp;
92456069Sbostic 	s = splbio();
92556069Sbostic 	bp->b_vp->v_numoutput += 2;
92656069Sbostic 	splx(s);
92753574Sheideman 	(strategy)(&vop_strategy_a);
92851215Sbostic 	biowait(bp);
92951342Sbostic 
93051356Sbostic 	/* Write the second superblock (don't wait). */
93151215Sbostic 	bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1];
93255940Sbostic 	bp->b_flags |= B_CALL | B_ASYNC | B_BUSY;
93351860Sbostic 	bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
93455940Sbostic 	bp->b_iodone = lfs_supercallback;
93553574Sheideman 	(strategy)(&vop_strategy_a);
93651215Sbostic }
93751215Sbostic 
93851342Sbostic /*
93951342Sbostic  * Logical block number match routines used when traversing the dirty block
94051342Sbostic  * chain.
94151342Sbostic  */
94252077Sbostic int
94352077Sbostic lfs_match_data(fs, bp)
94451860Sbostic 	struct lfs *fs;
94552085Sbostic 	struct buf *bp;
94651215Sbostic {
94751342Sbostic 	return (bp->b_lblkno >= 0);
94851215Sbostic }
94951215Sbostic 
95052077Sbostic int
95152077Sbostic lfs_match_indir(fs, bp)
95251860Sbostic 	struct lfs *fs;
95352085Sbostic 	struct buf *bp;
95451215Sbostic {
95551860Sbostic 	int lbn;
95651860Sbostic 
95751860Sbostic 	lbn = bp->b_lblkno;
95851860Sbostic 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
95951215Sbostic }
96051215Sbostic 
96152077Sbostic int
96252077Sbostic lfs_match_dindir(fs, bp)
96351860Sbostic 	struct lfs *fs;
96452085Sbostic 	struct buf *bp;
96551215Sbostic {
96651860Sbostic 	int lbn;
96751860Sbostic 
96851860Sbostic 	lbn = bp->b_lblkno;
96951860Sbostic 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
97051215Sbostic }
97151215Sbostic 
97252077Sbostic int
97352077Sbostic lfs_match_tindir(fs, bp)
97451499Sbostic 	struct lfs *fs;
97552085Sbostic 	struct buf *bp;
97651342Sbostic {
97751860Sbostic 	int lbn;
97851342Sbostic 
97951860Sbostic 	lbn = bp->b_lblkno;
98051860Sbostic 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
98151860Sbostic }
98251342Sbostic 
98351860Sbostic /*
98451860Sbostic  * Allocate a new buffer header.
98551860Sbostic  */
98652085Sbostic struct buf *
98755940Sbostic lfs_newbuf(vp, daddr, size)
98855940Sbostic 	struct vnode *vp;
98951860Sbostic 	daddr_t daddr;
99051860Sbostic 	size_t size;
99151860Sbostic {
99252085Sbostic 	struct buf *bp;
99355940Sbostic 	size_t nbytes;
99451342Sbostic 
99555940Sbostic 	nbytes = roundup(size, DEV_BSIZE);
99655940Sbostic 	bp = malloc(sizeof(struct buf) + nbytes, M_SEGMENT, M_WAITOK);
99756069Sbostic 	bzero(bp, sizeof(struct buf) + nbytes);
99855940Sbostic 	bgetvp(vp, bp);
99955940Sbostic 	bp->b_un.b_addr = (caddr_t)(bp + 1);
100055940Sbostic 	bp->b_bufsize = size;
100155940Sbostic 	bp->b_bcount = size;
100251860Sbostic 	bp->b_lblkno = daddr;
100351860Sbostic 	bp->b_blkno = daddr;
100451860Sbostic 	bp->b_error = 0;
100551860Sbostic 	bp->b_resid = 0;
100655940Sbostic 	bp->b_iodone = lfs_callback;
100756027Sbostic 	bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE;
100851860Sbostic 	return (bp);
100951860Sbostic }
101051342Sbostic 
101153347Sbostic void
101251860Sbostic lfs_callback(bp)
101352085Sbostic 	struct buf *bp;
101451860Sbostic {
101551860Sbostic 	struct lfs *fs;
101651342Sbostic 
101756056Sbostic 	fs = (struct lfs *)bp->b_saveaddr;
101851860Sbostic #ifdef DIAGNOSTIC
101951860Sbostic 	if (fs->lfs_iocount == 0)
102051860Sbostic 		panic("lfs_callback: zero iocount\n");
102151860Sbostic #endif
102251860Sbostic 	if (--fs->lfs_iocount == 0)
102352688Sbostic 		wakeup(&fs->lfs_iocount);
102451915Sbostic 
102555940Sbostic 	brelvp(bp);
102655940Sbostic 	free(bp, M_SEGMENT);
102751860Sbostic }
102851342Sbostic 
102955940Sbostic void
103055940Sbostic lfs_supercallback(bp)
103155940Sbostic 	struct buf *bp;
103255940Sbostic {
103355940Sbostic 	brelvp(bp);
103455940Sbostic 	free(bp, M_SEGMENT);
103555940Sbostic }
103655940Sbostic 
103751215Sbostic /*
103851188Sbostic  * Shellsort (diminishing increment sort) from Data Structures and
103951188Sbostic  * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
104051188Sbostic  * see also Knuth Vol. 3, page 84.  The increments are selected from
104151188Sbostic  * formula (8), page 95.  Roughly O(N^3/2).
104251188Sbostic  */
104351188Sbostic /*
104451188Sbostic  * This is our own private copy of shellsort because we want to sort
104551188Sbostic  * two parallel arrays (the array of buffer pointers and the array of
104651188Sbostic  * logical block numbers) simultaneously.  Note that we cast the array
104751188Sbostic  * of logical block numbers to a unsigned in this routine so that the
104851188Sbostic  * negative block numbers (meta data blocks) sort AFTER the data blocks.
104951188Sbostic  */
105052077Sbostic void
105152077Sbostic lfs_shellsort(bp_array, lb_array, nmemb)
105252085Sbostic 	struct buf **bp_array;
105351215Sbostic 	daddr_t *lb_array;
105451188Sbostic 	register int nmemb;
105551188Sbostic {
105651188Sbostic 	static int __rsshell_increments[] = { 4, 1, 0 };
105751188Sbostic 	register int incr, *incrp, t1, t2;
105852085Sbostic 	struct buf *bp_temp;
105951188Sbostic 	u_long lb_temp;
106051188Sbostic 
106151188Sbostic 	for (incrp = __rsshell_increments; incr = *incrp++;)
106251188Sbostic 		for (t1 = incr; t1 < nmemb; ++t1)
106351188Sbostic 			for (t2 = t1 - incr; t2 >= 0;)
106451188Sbostic 				if (lb_array[t2] > lb_array[t2 + incr]) {
106551188Sbostic 					lb_temp = lb_array[t2];
106651188Sbostic 					lb_array[t2] = lb_array[t2 + incr];
106751188Sbostic 					lb_array[t2 + incr] = lb_temp;
106851188Sbostic 					bp_temp = bp_array[t2];
106951188Sbostic 					bp_array[t2] = bp_array[t2 + incr];
107051188Sbostic 					bp_array[t2 + incr] = bp_temp;
107151188Sbostic 					t2 -= incr;
107251188Sbostic 				} else
107351188Sbostic 					break;
107451188Sbostic }
107555940Sbostic 
1076