xref: /csrg-svn/sys/ufs/lfs/lfs_segment.c (revision 69421)
151188Sbostic /*
263375Sbostic  * Copyright (c) 1991, 1993
363375Sbostic  *	The Regents of the University of California.  All rights reserved.
451188Sbostic  *
551188Sbostic  * %sccs.include.redist.c%
651188Sbostic  *
7*69421Smckusick  *	@(#)lfs_segment.c	8.9 (Berkeley) 05/14/95
851188Sbostic  */
951188Sbostic 
1051490Sbostic #include <sys/param.h>
1151490Sbostic #include <sys/systm.h>
1251490Sbostic #include <sys/namei.h>
1352085Sbostic #include <sys/kernel.h>
1451490Sbostic #include <sys/resourcevar.h>
1551490Sbostic #include <sys/file.h>
1651490Sbostic #include <sys/stat.h>
1751490Sbostic #include <sys/buf.h>
1851490Sbostic #include <sys/proc.h>
1951490Sbostic #include <sys/conf.h>
2051490Sbostic #include <sys/vnode.h>
2151490Sbostic #include <sys/malloc.h>
2251490Sbostic #include <sys/mount.h>
2351188Sbostic 
2455033Smckusick #include <miscfs/specfs/specdev.h>
2555033Smckusick #include <miscfs/fifofs/fifo.h>
2655033Smckusick 
2751499Sbostic #include <ufs/ufs/quota.h>
2851499Sbostic #include <ufs/ufs/inode.h>
2951499Sbostic #include <ufs/ufs/dir.h>
3051499Sbostic #include <ufs/ufs/ufsmount.h>
3156478Smargo #include <ufs/ufs/ufs_extern.h>
3251490Sbostic 
3351499Sbostic #include <ufs/lfs/lfs.h>
3451499Sbostic #include <ufs/lfs/lfs_extern.h>
3551490Sbostic 
3657072Smargo extern int count_lock_queue __P((void));
3757072Smargo 
3855940Sbostic #define MAX_ACTIVE	10
3951188Sbostic /*
4051860Sbostic  * Determine if it's OK to start a partial in this segment, or if we need
4151860Sbostic  * to go on to a new segment.
4251301Sbostic  */
4351860Sbostic #define	LFS_PARTIAL_FITS(fs) \
4451860Sbostic 	((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
4551860Sbostic 	1 << (fs)->lfs_fsbtodb)
4651188Sbostic 
4753347Sbostic void	 lfs_callback __P((struct buf *));
4852085Sbostic void	 lfs_gather __P((struct lfs *, struct segment *,
4952085Sbostic 	     struct vnode *, int (*) __P((struct lfs *, struct buf *))));
5055940Sbostic int	 lfs_gatherblock __P((struct segment *, struct buf *, int *));
5168550Smckusick void	 lfs_iset __P((struct inode *, ufs_daddr_t, time_t));
5252085Sbostic int	 lfs_match_data __P((struct lfs *, struct buf *));
5352085Sbostic int	 lfs_match_dindir __P((struct lfs *, struct buf *));
5452085Sbostic int	 lfs_match_indir __P((struct lfs *, struct buf *));
5552085Sbostic int	 lfs_match_tindir __P((struct lfs *, struct buf *));
5652077Sbostic void	 lfs_newseg __P((struct lfs *));
5768550Smckusick void	 lfs_shellsort __P((struct buf **, ufs_daddr_t *, register int));
5855940Sbostic void	 lfs_supercallback __P((struct buf *));
5956027Sbostic void	 lfs_updatemeta __P((struct segment *));
6057072Smargo int	 lfs_vref __P((struct vnode *));
6157072Smargo void	 lfs_vunref __P((struct vnode *));
6252085Sbostic void	 lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
6354264Sbostic int	 lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
6454264Sbostic int	 lfs_writeseg __P((struct lfs *, struct segment *));
6557072Smargo void	 lfs_writesuper __P((struct lfs *));
6654264Sbostic void	 lfs_writevnodes __P((struct lfs *fs, struct mount *mp,
6754264Sbostic 	    struct segment *sp, int dirops));
6851188Sbostic 
6951860Sbostic int	lfs_allclean_wakeup;		/* Cleaner wakeup address. */
7051860Sbostic 
7157072Smargo /* Statistics Counters */
7257072Smargo #define DOSTATS
7357072Smargo struct lfs_stats lfs_stats;
7457072Smargo 
7557072Smargo /* op values to lfs_writevnodes */
7657072Smargo #define	VN_REG	0
7757072Smargo #define	VN_DIROP	1
7857072Smargo #define	VN_EMPTY	2
7957072Smargo 
8052328Sbostic /*
8152328Sbostic  * Ifile and meta data blocks are not marked busy, so segment writes MUST be
8252328Sbostic  * single threaded.  Currently, there are two paths into lfs_segwrite, sync()
8352328Sbostic  * and getnewbuf().  They both mark the file system busy.  Lfs_vflush()
8452328Sbostic  * explicitly marks the file system busy.  So lfs_segwrite is safe.  I think.
8552328Sbostic  */
8652328Sbostic 
8751188Sbostic int
8852328Sbostic lfs_vflush(vp)
8952328Sbostic 	struct vnode *vp;
9052328Sbostic {
9152328Sbostic 	struct inode *ip;
9252328Sbostic 	struct lfs *fs;
9352328Sbostic 	struct segment *sp;
9452328Sbostic 
9554690Sbostic 	fs = VFSTOUFS(vp->v_mount)->um_lfs;
9656159Smargo 	if (fs->lfs_nactive > MAX_ACTIVE)
9757072Smargo 		return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP));
9857072Smargo 	lfs_seglock(fs, SEGM_SYNC);
9957072Smargo 	sp = fs->lfs_sp;
10056159Smargo 
10152328Sbostic 
10257072Smargo 	ip = VTOI(vp);
10365242Smckusick 	if (vp->v_dirtyblkhd.lh_first == NULL)
10457072Smargo 		lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY);
10552328Sbostic 
10655551Sbostic 	do {
10755551Sbostic 		do {
10865242Smckusick 			if (vp->v_dirtyblkhd.lh_first != NULL)
10955551Sbostic 				lfs_writefile(fs, sp, vp);
11055551Sbostic 		} while (lfs_writeinode(fs, sp, ip));
11152328Sbostic 
11255551Sbostic 	} while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
11352328Sbostic 
11457072Smargo #ifdef DOSTATS
11557072Smargo 	++lfs_stats.nwrites;
11657072Smargo 	if (sp->seg_flags & SEGM_SYNC)
11757072Smargo 		++lfs_stats.nsync_writes;
11857072Smargo 	if (sp->seg_flags & SEGM_CKP)
11957072Smargo 		++lfs_stats.ncheckpoints;
12057072Smargo #endif
12154690Sbostic 	lfs_segunlock(fs);
12252328Sbostic 	return (0);
12352328Sbostic }
12452328Sbostic 
12554264Sbostic void
12657072Smargo lfs_writevnodes(fs, mp, sp, op)
12754264Sbostic 	struct lfs *fs;
12854264Sbostic 	struct mount *mp;
12954264Sbostic 	struct segment *sp;
13057072Smargo 	int op;
13154264Sbostic {
13254264Sbostic 	struct inode *ip;
13354264Sbostic 	struct vnode *vp;
13454264Sbostic 
13569290Smckusick /* BEGIN HACK */
13669290Smckusick #define	VN_OFFSET	(((void *)&vp->v_mntvnodes.le_next) - (void *)vp)
13769290Smckusick #define	BACK_VP(VP)	((struct vnode *)(((void *)VP->v_mntvnodes.le_prev) - VN_OFFSET))
13869290Smckusick #define	BEG_OF_VLIST	((struct vnode *)(((void *)&mp->mnt_vnodelist.lh_first) - VN_OFFSET))
13969290Smckusick 
14069290Smckusick /* Find last vnode. */
14169290Smckusick loop:   for (vp = mp->mnt_vnodelist.lh_first;
14269290Smckusick 	     vp && vp->v_mntvnodes.le_next != NULL;
14369290Smckusick 	     vp = vp->v_mntvnodes.le_next);
14469290Smckusick 	for (; vp && vp != BEG_OF_VLIST; vp = BACK_VP(vp)) {
14569290Smckusick /* END HACK */
14669290Smckusick /*
14765242Smckusick loop:
14865242Smckusick 	for (vp = mp->mnt_vnodelist.lh_first;
14965242Smckusick 	     vp != NULL;
15065242Smckusick 	     vp = vp->v_mntvnodes.le_next) {
15169290Smckusick */
15254264Sbostic 		/*
15354264Sbostic 		 * If the vnode that we are about to sync is no longer
15454264Sbostic 		 * associated with this mount point, start over.
15554264Sbostic 		 */
15654264Sbostic 		if (vp->v_mount != mp)
15754264Sbostic 			goto loop;
15854264Sbostic 
15957072Smargo 		/* XXX ignore dirops for now
16057072Smargo 		if (op == VN_DIROP && !(vp->v_flag & VDIROP) ||
16157072Smargo 		    op != VN_DIROP && (vp->v_flag & VDIROP))
16254264Sbostic 			continue;
16357072Smargo 		*/
16454264Sbostic 
16565242Smckusick 		if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first)
16657072Smargo 			continue;
16757072Smargo 
16857072Smargo 		if (vp->v_type == VNON)
16957072Smargo 			continue;
17057072Smargo 
17157072Smargo 		if (lfs_vref(vp))
17257072Smargo 			continue;
17357072Smargo 
17454264Sbostic 		/*
17554264Sbostic 		 * Write the inode/file if dirty and it's not the
17654264Sbostic 		 * the IFILE.
17754264Sbostic 		 */
17854264Sbostic 		ip = VTOI(vp);
17964611Sbostic 		if ((ip->i_flag &
18064611Sbostic 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE) ||
18165242Smckusick 		    vp->v_dirtyblkhd.lh_first != NULL) &&
18254264Sbostic 		    ip->i_number != LFS_IFILE_INUM) {
18365242Smckusick 			if (vp->v_dirtyblkhd.lh_first != NULL)
18454264Sbostic 				lfs_writefile(fs, sp, vp);
18554264Sbostic 			(void) lfs_writeinode(fs, sp, ip);
18654264Sbostic 		}
18754264Sbostic 		vp->v_flag &= ~VDIROP;
18857072Smargo 		lfs_vunref(vp);
18954264Sbostic 	}
19054264Sbostic }
19154264Sbostic 
19252328Sbostic int
19357072Smargo lfs_segwrite(mp, flags)
19452085Sbostic 	struct mount *mp;
19557072Smargo 	int flags;			/* Do a checkpoint. */
19651188Sbostic {
197*69421Smckusick 	struct proc *p = curproc;	/* XXX */
19855592Sbostic 	struct buf *bp;
19952085Sbostic 	struct inode *ip;
20051499Sbostic 	struct lfs *fs;
20152085Sbostic 	struct segment *sp;
20252085Sbostic 	struct vnode *vp;
20355592Sbostic 	SEGUSE *segusep;
20468550Smckusick 	ufs_daddr_t ibno;
20555940Sbostic 	CLEANERINFO *cip;
20665473Sbostic 	int clean, do_ckp, error, i;
20751188Sbostic 
20852328Sbostic 	fs = VFSTOUFS(mp)->um_lfs;
20955940Sbostic 
21055940Sbostic  	/*
21155940Sbostic  	 * If we have fewer than 2 clean segments, wait until cleaner
21255940Sbostic 	 * writes.
21355940Sbostic  	 */
21455940Sbostic 	do {
21555940Sbostic 		LFS_CLEANERINFO(cip, fs, bp);
21655940Sbostic 		clean = cip->clean;
21755940Sbostic 		brelse(bp);
21855940Sbostic 		if (clean <= 2) {
21969290Smckusick 			/* printf ("segs clean: %d\n", clean); */
22055940Sbostic 			wakeup(&lfs_allclean_wakeup);
22155940Sbostic 			if (error = tsleep(&fs->lfs_avail, PRIBIO + 1,
22255940Sbostic 			    "lfs writer", 0))
22355940Sbostic 				return (error);
22455940Sbostic 		}
22555940Sbostic 	} while (clean <= 2 );
22652085Sbostic 
22751860Sbostic 	/*
22852328Sbostic 	 * Allocate a segment structure and enough space to hold pointers to
22952328Sbostic 	 * the maximum possible number of buffers which can be described in a
23052328Sbostic 	 * single summary block.
23152328Sbostic 	 */
23257072Smargo 	do_ckp = flags & SEGM_CKP || fs->lfs_nactive > MAX_ACTIVE;
23357072Smargo 	lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0));
23457072Smargo 	sp = fs->lfs_sp;
23552328Sbostic 
23657072Smargo 	lfs_writevnodes(fs, mp, sp, VN_REG);
23751342Sbostic 
23857072Smargo 	/* XXX ignore ordering of dirops for now */
23957072Smargo 	/* XXX
24054264Sbostic 	fs->lfs_writer = 1;
24154264Sbostic 	if (fs->lfs_dirops && (error =
24254264Sbostic 	    tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) {
24354264Sbostic 		free(sp->bpp, M_SEGMENT);
24454264Sbostic 		free(sp, M_SEGMENT);
24554264Sbostic 		fs->lfs_writer = 0;
24655551Sbostic 		return (error);
24754264Sbostic 	}
24851860Sbostic 
24957072Smargo 	lfs_writevnodes(fs, mp, sp, VN_DIROP);
25057072Smargo 	*/
25151860Sbostic 
25254264Sbostic 	/*
25355592Sbostic 	 * If we are doing a checkpoint, mark everything since the
25455592Sbostic 	 * last checkpoint as no longer ACTIVE.
25554264Sbostic 	 */
25655592Sbostic 	if (do_ckp)
25755592Sbostic 		for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz;
25855592Sbostic 		     --ibno >= fs->lfs_cleansz; ) {
25955592Sbostic 			if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize,
26055592Sbostic 			    NOCRED, &bp))
26155592Sbostic 
26255592Sbostic 				panic("lfs: ifile read");
26364526Sbostic 			segusep = (SEGUSE *)bp->b_data;
26455592Sbostic 			for (i = fs->lfs_sepb; i--; segusep++)
26555592Sbostic 				segusep->su_flags &= ~SEGUSE_ACTIVE;
26655592Sbostic 
26755940Sbostic 			error = VOP_BWRITE(bp);
26855592Sbostic 		}
26955592Sbostic 
27054264Sbostic 	if (do_ckp || fs->lfs_doifile) {
27156086Sbostic redo:
27254264Sbostic 		vp = fs->lfs_ivnode;
273*69421Smckusick 		while (vget(vp, LK_EXCLUSIVE, p))
274*69421Smckusick 			continue;
27552328Sbostic 		ip = VTOI(vp);
27665242Smckusick 		if (vp->v_dirtyblkhd.lh_first != NULL)
27755592Sbostic 			lfs_writefile(fs, sp, vp);
27855592Sbostic 		(void)lfs_writeinode(fs, sp, ip);
27952077Sbostic 		vput(vp);
28057072Smargo 		if (lfs_writeseg(fs, sp) && do_ckp)
28156086Sbostic 			goto redo;
28254264Sbostic 	} else
28354264Sbostic 		(void) lfs_writeseg(fs, sp);
28451342Sbostic 
28551215Sbostic 	/*
28651860Sbostic 	 * If the I/O count is non-zero, sleep until it reaches zero.  At the
28751860Sbostic 	 * moment, the user's process hangs around so we can sleep.
28851215Sbostic 	 */
28957072Smargo 	/* XXX ignore dirops for now
29054264Sbostic 	fs->lfs_writer = 0;
29154264Sbostic 	fs->lfs_doifile = 0;
29254264Sbostic 	wakeup(&fs->lfs_dirops);
29357072Smargo 	*/
29454264Sbostic 
29557072Smargo #ifdef DOSTATS
29657072Smargo 	++lfs_stats.nwrites;
29757072Smargo 	if (sp->seg_flags & SEGM_SYNC)
29857072Smargo 		++lfs_stats.nsync_writes;
29957072Smargo 	if (sp->seg_flags & SEGM_CKP)
30057072Smargo 		++lfs_stats.ncheckpoints;
30157072Smargo #endif
30254690Sbostic 	lfs_segunlock(fs);
30351860Sbostic 	return (0);
30451188Sbostic }
30551188Sbostic 
30651860Sbostic /*
30751860Sbostic  * Write the dirty blocks associated with a vnode.
30851860Sbostic  */
30952077Sbostic void
31051860Sbostic lfs_writefile(fs, sp, vp)
31151499Sbostic 	struct lfs *fs;
31252085Sbostic 	struct segment *sp;
31352085Sbostic 	struct vnode *vp;
31451188Sbostic {
31551860Sbostic 	struct buf *bp;
31652085Sbostic 	struct finfo *fip;
31751860Sbostic 	IFILE *ifp;
31851188Sbostic 
31952085Sbostic 	if (sp->seg_bytes_left < fs->lfs_bsize ||
32057072Smargo 	    sp->sum_bytes_left < sizeof(struct finfo))
32154264Sbostic 		(void) lfs_writeseg(fs, sp);
32257072Smargo 
32368550Smckusick 	sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(ufs_daddr_t);
32456478Smargo 	++((SEGSUM *)(sp->segsum))->ss_nfinfo;
32551215Sbostic 
32652085Sbostic 	fip = sp->fip;
32752085Sbostic 	fip->fi_nblocks = 0;
32852085Sbostic 	fip->fi_ino = VTOI(vp)->i_number;
32952085Sbostic 	LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
33052085Sbostic 	fip->fi_version = ifp->if_version;
33152085Sbostic 	brelse(bp);
33251188Sbostic 
33352085Sbostic 	/*
33452085Sbostic 	 * It may not be necessary to write the meta-data blocks at this point,
33552085Sbostic 	 * as the roll-forward recovery code should be able to reconstruct the
33652085Sbostic 	 * list.
33752085Sbostic 	 */
33852085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_data);
33952085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_indir);
34052085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_dindir);
34151860Sbostic #ifdef TRIPLE
34252085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_tindir);
34351860Sbostic #endif
34451342Sbostic 
34552085Sbostic 	fip = sp->fip;
34652085Sbostic 	if (fip->fi_nblocks != 0) {
34752085Sbostic 		sp->fip =
34852085Sbostic 		    (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
34968550Smckusick 		    sizeof(ufs_daddr_t) * (fip->fi_nblocks - 1));
35055940Sbostic 		sp->start_lbp = &sp->fip->fi_blocks[0];
35156478Smargo 	} else {
35268550Smckusick 		sp->sum_bytes_left += sizeof(struct finfo) - sizeof(ufs_daddr_t);
35356478Smargo 		--((SEGSUM *)(sp->segsum))->ss_nfinfo;
35456478Smargo 	}
35551215Sbostic }
35651215Sbostic 
35754264Sbostic int
35851915Sbostic lfs_writeinode(fs, sp, ip)
35951915Sbostic 	struct lfs *fs;
36052085Sbostic 	struct segment *sp;
36152085Sbostic 	struct inode *ip;
36251915Sbostic {
36352085Sbostic 	struct buf *bp, *ibp;
36452077Sbostic 	IFILE *ifp;
36552682Sstaelin 	SEGUSE *sup;
36668550Smckusick 	ufs_daddr_t daddr;
36752077Sbostic 	ino_t ino;
36857072Smargo 	int error, i, ndx;
36954264Sbostic 	int redo_ifile = 0;
37051915Sbostic 
37164611Sbostic 	if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)))
37256190Smargo 		return(0);
37355940Sbostic 
37451915Sbostic 	/* Allocate a new inode block if necessary. */
37551915Sbostic 	if (sp->ibp == NULL) {
37651915Sbostic 		/* Allocate a new segment if necessary. */
37751915Sbostic 		if (sp->seg_bytes_left < fs->lfs_bsize ||
37868550Smckusick 		    sp->sum_bytes_left < sizeof(ufs_daddr_t))
37954264Sbostic 			(void) lfs_writeseg(fs, sp);
38051915Sbostic 
38151915Sbostic 		/* Get next inode block. */
38252682Sstaelin 		daddr = fs->lfs_offset;
38351915Sbostic 		fs->lfs_offset += fsbtodb(fs, 1);
38451915Sbostic 		sp->ibp = *sp->cbpp++ =
38556056Sbostic 		    lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr,
38656056Sbostic 		    fs->lfs_bsize);
38757072Smargo 		/* Zero out inode numbers */
38857072Smargo 		for (i = 0; i < INOPB(fs); ++i)
38964526Sbostic 			((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0;
39055940Sbostic 		++sp->start_bpp;
39155940Sbostic 		fs->lfs_avail -= fsbtodb(fs, 1);
39252688Sbostic 		/* Set remaining space counters. */
39351915Sbostic 		sp->seg_bytes_left -= fs->lfs_bsize;
39468550Smckusick 		sp->sum_bytes_left -= sizeof(ufs_daddr_t);
39568550Smckusick 		ndx = LFS_SUMMARY_SIZE / sizeof(ufs_daddr_t) -
39651915Sbostic 		    sp->ninodes / INOPB(fs) - 1;
39768550Smckusick 		((ufs_daddr_t *)(sp->segsum))[ndx] = daddr;
39851915Sbostic 	}
39951915Sbostic 
40052085Sbostic 	/* Update the inode times and copy the inode onto the inode page. */
40164611Sbostic 	if (ip->i_flag & IN_MODIFIED)
40256056Sbostic 		--fs->lfs_uinodes;
40352077Sbostic 	ITIMES(ip, &time, &time);
40464611Sbostic 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
40551915Sbostic 	bp = sp->ibp;
40664526Sbostic 	((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = ip->i_din;
40751915Sbostic 	/* Increment inode count in segment summary block. */
40851915Sbostic 	++((SEGSUM *)(sp->segsum))->ss_ninos;
40951915Sbostic 
41051915Sbostic 	/* If this page is full, set flag to allocate a new page. */
41151915Sbostic 	if (++sp->ninodes % INOPB(fs) == 0)
41251915Sbostic 		sp->ibp = NULL;
41351915Sbostic 
41451915Sbostic 	/*
41552077Sbostic 	 * If updating the ifile, update the super-block.  Update the disk
41652077Sbostic 	 * address and access times for this inode in the ifile.
41751915Sbostic 	 */
41852077Sbostic 	ino = ip->i_number;
41955696Sbostic 	if (ino == LFS_IFILE_INUM) {
42055696Sbostic 		daddr = fs->lfs_idaddr;
42151915Sbostic 		fs->lfs_idaddr = bp->b_blkno;
42255696Sbostic 	} else {
42355696Sbostic 		LFS_IENTRY(ifp, fs, ino, ibp);
42455696Sbostic 		daddr = ifp->if_daddr;
42555696Sbostic 		ifp->if_daddr = bp->b_blkno;
42655940Sbostic 		error = VOP_BWRITE(ibp);
42755696Sbostic 	}
42852077Sbostic 
42954264Sbostic 	/*
43054264Sbostic 	 * No need to update segment usage if there was no former inode address
43154264Sbostic 	 * or if the last inode address is in the current partial segment.
43254264Sbostic 	 */
43354264Sbostic 	if (daddr != LFS_UNUSED_DADDR &&
43455803Sbostic 	    !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) {
43552682Sstaelin 		LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
43652682Sstaelin #ifdef DIAGNOSTIC
43754264Sbostic 		if (sup->su_nbytes < sizeof(struct dinode)) {
43852819Sbostic 			/* XXX -- Change to a panic. */
43952819Sbostic 			printf("lfs: negative bytes (segment %d)\n",
44052682Sstaelin 			    datosn(fs, daddr));
44154264Sbostic 			panic("negative bytes");
44254264Sbostic 		}
44352682Sstaelin #endif
44452682Sstaelin 		sup->su_nbytes -= sizeof(struct dinode);
44556069Sbostic 		redo_ifile =
44656069Sbostic 		    (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
44755940Sbostic 		error = VOP_BWRITE(bp);
44852682Sstaelin 	}
44955551Sbostic 	return (redo_ifile);
45051915Sbostic }
45151915Sbostic 
45255940Sbostic int
45355940Sbostic lfs_gatherblock(sp, bp, sptr)
45455940Sbostic 	struct segment *sp;
45555940Sbostic 	struct buf *bp;
45655940Sbostic 	int *sptr;
45755940Sbostic {
45855940Sbostic 	struct lfs *fs;
45955940Sbostic 	int version;
46055940Sbostic 
46155940Sbostic 	/*
46255940Sbostic 	 * If full, finish this segment.  We may be doing I/O, so
46355940Sbostic 	 * release and reacquire the splbio().
46455940Sbostic 	 */
46556027Sbostic #ifdef DIAGNOSTIC
46656027Sbostic 	if (sp->vp == NULL)
46756027Sbostic 		panic ("lfs_gatherblock: Null vp in segment");
46856027Sbostic #endif
46955940Sbostic 	fs = sp->fs;
47068550Smckusick 	if (sp->sum_bytes_left < sizeof(ufs_daddr_t) ||
47169290Smckusick 	    sp->seg_bytes_left < bp->b_bcount) {
47255940Sbostic 		if (sptr)
47355940Sbostic 			splx(*sptr);
47456027Sbostic 		lfs_updatemeta(sp);
47555940Sbostic 
47655940Sbostic 		version = sp->fip->fi_version;
47755940Sbostic 		(void) lfs_writeseg(fs, sp);
47855940Sbostic 
47955940Sbostic 		sp->fip->fi_version = version;
48056027Sbostic 		sp->fip->fi_ino = VTOI(sp->vp)->i_number;
48156478Smargo 		/* Add the current file to the segment summary. */
48256478Smargo 		++((SEGSUM *)(sp->segsum))->ss_nfinfo;
48355940Sbostic 		sp->sum_bytes_left -=
48468550Smckusick 		    sizeof(struct finfo) - sizeof(ufs_daddr_t);
48555940Sbostic 
48655940Sbostic 		if (sptr)
48755940Sbostic 			*sptr = splbio();
48855940Sbostic 		return(1);
48955940Sbostic 	}
49055940Sbostic 
49155940Sbostic 	/* Insert into the buffer list, update the FINFO block. */
49255940Sbostic 	bp->b_flags |= B_GATHERED;
49355940Sbostic 	*sp->cbpp++ = bp;
49455940Sbostic 	sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno;
49555940Sbostic 
49668550Smckusick 	sp->sum_bytes_left -= sizeof(ufs_daddr_t);
49769290Smckusick 	sp->seg_bytes_left -= bp->b_bcount;
49855940Sbostic 	return(0);
49955940Sbostic }
50055940Sbostic 
50152077Sbostic void
50251215Sbostic lfs_gather(fs, sp, vp, match)
50351499Sbostic 	struct lfs *fs;
50452085Sbostic 	struct segment *sp;
50552085Sbostic 	struct vnode *vp;
50652085Sbostic 	int (*match) __P((struct lfs *, struct buf *));
50751215Sbostic {
50855940Sbostic 	struct buf *bp;
50951342Sbostic 	int s;
51051215Sbostic 
51156027Sbostic 	sp->vp = vp;
51255940Sbostic 	s = splbio();
51369290Smckusick /* This is a hack to see if ordering the blocks in LFS makes a difference. */
51469290Smckusick /* BEGIN HACK */
51569290Smckusick #define	BUF_OFFSET	(((void *)&bp->b_vnbufs.le_next) - (void *)bp)
51669290Smckusick #define	BACK_BUF(BP)	((struct buf *)(((void *)BP->b_vnbufs.le_prev) - BUF_OFFSET))
51769290Smckusick #define	BEG_OF_LIST	((struct buf *)(((void *)&vp->v_dirtyblkhd.lh_first) - BUF_OFFSET))
51869290Smckusick 
51969290Smckusick 
52069290Smckusick /*loop:	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {*/
52169290Smckusick /* Find last buffer. */
52269290Smckusick loop:   for (bp = vp->v_dirtyblkhd.lh_first; bp && bp->b_vnbufs.le_next != NULL;
52369290Smckusick 	    bp = bp->b_vnbufs.le_next);
52469290Smckusick 	for (; bp && bp != BEG_OF_LIST; bp = BACK_BUF(bp)) {
52569290Smckusick /* END HACK */
52654264Sbostic 		if (bp->b_flags & B_BUSY || !match(fs, bp) ||
52754264Sbostic 		    bp->b_flags & B_GATHERED)
52851215Sbostic 			continue;
52951342Sbostic #ifdef DIAGNOSTIC
53051860Sbostic 		if (!(bp->b_flags & B_DELWRI))
53151915Sbostic 			panic("lfs_gather: bp not B_DELWRI");
53251860Sbostic 		if (!(bp->b_flags & B_LOCKED))
53351915Sbostic 			panic("lfs_gather: bp not B_LOCKED");
53451342Sbostic #endif
53555940Sbostic 		if (lfs_gatherblock(sp, bp, &s))
53653145Sstaelin 			goto loop;
53751188Sbostic 	}
53851215Sbostic 	splx(s);
53956027Sbostic 	lfs_updatemeta(sp);
54056027Sbostic 	sp->vp = NULL;
54151188Sbostic }
54251188Sbostic 
54355940Sbostic 
54451342Sbostic /*
54551342Sbostic  * Update the metadata that points to the blocks listed in the FINFO
54651188Sbostic  * array.
54751188Sbostic  */
54852077Sbostic void
54956027Sbostic lfs_updatemeta(sp)
55052085Sbostic 	struct segment *sp;
55151188Sbostic {
55251915Sbostic 	SEGUSE *sup;
55352085Sbostic 	struct buf *bp;
55455940Sbostic 	struct lfs *fs;
55556027Sbostic 	struct vnode *vp;
55656478Smargo 	struct indir a[NIADDR + 2], *ap;
55752085Sbostic 	struct inode *ip;
55868550Smckusick 	ufs_daddr_t daddr, lbn, off;
55969290Smckusick 	int error, i, nblocks, num;
56051188Sbostic 
56156027Sbostic 	vp = sp->vp;
56255940Sbostic 	nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
56369290Smckusick 	if (nblocks < 0)
56469290Smckusick 		panic("This is a bad thing\n");
56556027Sbostic 	if (vp == NULL || nblocks == 0)
56651215Sbostic 		return;
56751215Sbostic 
56851915Sbostic 	/* Sort the blocks. */
56955940Sbostic 	if (!(sp->seg_flags & SEGM_CLEAN))
57055940Sbostic 		lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks);
57151215Sbostic 
57251915Sbostic 	/*
57369290Smckusick 	 * Record the length of the last block in case it's a fragment.
57469290Smckusick 	 * If there are indirect blocks present, they sort last.  An
57569290Smckusick 	 * indirect block will be lfs_bsize and its presence indicates
57669290Smckusick 	 * that you cannot have fragments.
57769290Smckusick 	 */
57869290Smckusick 	sp->fip->fi_lastlength = sp->start_bpp[nblocks - 1]->b_bcount;
57969290Smckusick 
58069290Smckusick 	/*
58151915Sbostic 	 * Assign disk addresses, and update references to the logical
58251915Sbostic 	 * block and the segment usage information.
58351915Sbostic 	 */
58455940Sbostic 	fs = sp->fs;
58555940Sbostic 	for (i = nblocks; i--; ++sp->start_bpp) {
58655940Sbostic 		lbn = *sp->start_lbp++;
58755940Sbostic 		(*sp->start_bpp)->b_blkno = off = fs->lfs_offset;
58869290Smckusick 		fs->lfs_offset +=
58969290Smckusick 		    fragstodb(fs, numfrags(fs, (*sp->start_bpp)->b_bcount));
59051215Sbostic 
59156478Smargo 		if (error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL))
59256478Smargo 			panic("lfs_updatemeta: ufs_bmaparray %d", error);
59351860Sbostic 		ip = VTOI(vp);
59451860Sbostic 		switch (num) {
59551860Sbostic 		case 0:
59651915Sbostic 			ip->i_db[lbn] = off;
59751860Sbostic 			break;
59851860Sbostic 		case 1:
59951915Sbostic 			ip->i_ib[a[0].in_off] = off;
60051860Sbostic 			break;
60151860Sbostic 		default:
60251860Sbostic 			ap = &a[num - 1];
60351860Sbostic 			if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
60451860Sbostic 				panic("lfs_updatemeta: bread bno %d",
60551860Sbostic 				    ap->in_lbn);
60655458Sbostic 			/*
60755458Sbostic 			 * Bread may create a new indirect block which needs
60855458Sbostic 			 * to get counted for the inode.
60955458Sbostic 			 */
61055592Sbostic 			if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) {
61169290Smckusick 				ip->i_blocks += fsbtodb(fs, 1);
61269290Smckusick 				fs->lfs_bfree -= fragstodb(fs, fs->lfs_frag);
61355592Sbostic 			}
61468550Smckusick 			((ufs_daddr_t *)bp->b_data)[ap->in_off] = off;
61553530Sheideman 			VOP_BWRITE(bp);
61651188Sbostic 		}
61751915Sbostic 
61851915Sbostic 		/* Update segment usage information. */
61957072Smargo 		if (daddr != UNASSIGNED &&
62057072Smargo 		    !(daddr >= fs->lfs_lastpseg && daddr <= off)) {
62151915Sbostic 			LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
62251915Sbostic #ifdef DIAGNOSTIC
62369290Smckusick 			if (sup->su_nbytes < (*sp->start_bpp)->b_bcount) {
62452819Sbostic 				/* XXX -- Change to a panic. */
62552819Sbostic 				printf("lfs: negative bytes (segment %d)\n",
62651915Sbostic 				    datosn(fs, daddr));
62754264Sbostic 				panic ("Negative Bytes");
62854264Sbostic 			}
62951915Sbostic #endif
63069290Smckusick 			sup->su_nbytes -= (*sp->start_bpp)->b_bcount;
63155940Sbostic 			error = VOP_BWRITE(bp);
63251915Sbostic 		}
63351188Sbostic 	}
63451188Sbostic }
63551188Sbostic 
63651915Sbostic /*
63751915Sbostic  * Start a new segment.
63851915Sbostic  */
63957072Smargo int
64057072Smargo lfs_initseg(fs)
64151499Sbostic 	struct lfs *fs;
64257072Smargo {
64352085Sbostic 	struct segment *sp;
64451915Sbostic 	SEGUSE *sup;
64551915Sbostic 	SEGSUM *ssp;
64651915Sbostic 	struct buf *bp;
64757072Smargo 	int repeat;
64851215Sbostic 
64957072Smargo 	sp = fs->lfs_sp;
65057072Smargo 
65157072Smargo 	repeat = 0;
65251915Sbostic 	/* Advance to the next segment. */
65351927Sbostic 	if (!LFS_PARTIAL_FITS(fs)) {
65452682Sstaelin 		/* Wake up any cleaning procs waiting on this file system. */
65552688Sbostic 		wakeup(&lfs_allclean_wakeup);
65652682Sstaelin 
65751927Sbostic 		lfs_newseg(fs);
65857072Smargo 		repeat = 1;
65951927Sbostic 		fs->lfs_offset = fs->lfs_curseg;
66051915Sbostic 		sp->seg_number = datosn(fs, fs->lfs_curseg);
66151915Sbostic 		sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
66251915Sbostic 
66351915Sbostic 		/*
66451927Sbostic 		 * If the segment contains a superblock, update the offset
66551927Sbostic 		 * and summary address to skip over it.
66651915Sbostic 		 */
66752077Sbostic 		LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
66851927Sbostic 		if (sup->su_flags & SEGUSE_SUPERBLOCK) {
66951915Sbostic 			fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
67051915Sbostic 			sp->seg_bytes_left -= LFS_SBPAD;
67151215Sbostic 		}
67252085Sbostic 		brelse(bp);
67351915Sbostic 	} else {
67451915Sbostic 		sp->seg_number = datosn(fs, fs->lfs_curseg);
67551915Sbostic 		sp->seg_bytes_left = (fs->lfs_dbpseg -
67651915Sbostic 		    (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
67751915Sbostic 	}
67854264Sbostic 	fs->lfs_lastpseg = fs->lfs_offset;
67951342Sbostic 
68055940Sbostic 	sp->fs = fs;
68151915Sbostic 	sp->ibp = NULL;
68251915Sbostic 	sp->ninodes = 0;
68351342Sbostic 
68451915Sbostic 	/* Get a new buffer for SEGSUM and enter it into the buffer list. */
68551915Sbostic 	sp->cbpp = sp->bpp;
68656056Sbostic 	*sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset,
68756056Sbostic 	     LFS_SUMMARY_SIZE);
68864526Sbostic 	sp->segsum = (*sp->cbpp)->b_data;
68957072Smargo 	bzero(sp->segsum, LFS_SUMMARY_SIZE);
69055940Sbostic 	sp->start_bpp = ++sp->cbpp;
69151915Sbostic 	fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
69251342Sbostic 
69351915Sbostic 	/* Set point to SEGSUM, initialize it. */
69451915Sbostic 	ssp = sp->segsum;
69551915Sbostic 	ssp->ss_next = fs->lfs_nextseg;
69651915Sbostic 	ssp->ss_nfinfo = ssp->ss_ninos = 0;
69769290Smckusick 	ssp->ss_magic = SS_MAGIC;
69851342Sbostic 
69951915Sbostic 	/* Set pointer to first FINFO, initialize it. */
70068119Smckusick 	sp->fip = (struct finfo *)((caddr_t)sp->segsum + sizeof(SEGSUM));
70151915Sbostic 	sp->fip->fi_nblocks = 0;
70255940Sbostic 	sp->start_lbp = &sp->fip->fi_blocks[0];
70369290Smckusick 	sp->fip->fi_lastlength = 0;
70451342Sbostic 
70551915Sbostic 	sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
70651915Sbostic 	sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
70757072Smargo 
70857072Smargo 	return(repeat);
70951915Sbostic }
71051342Sbostic 
71151915Sbostic /*
71251915Sbostic  * Return the next segment to write.
71351915Sbostic  */
71452077Sbostic void
71551915Sbostic lfs_newseg(fs)
71651915Sbostic 	struct lfs *fs;
71751915Sbostic {
71851927Sbostic 	CLEANERINFO *cip;
71951915Sbostic 	SEGUSE *sup;
72051915Sbostic 	struct buf *bp;
72165473Sbostic 	int curseg, isdirty, sn;
72251915Sbostic 
72355592Sbostic         LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
72456159Smargo         sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
72556056Sbostic 	sup->su_nbytes = 0;
72656056Sbostic 	sup->su_nsums = 0;
72756056Sbostic 	sup->su_ninos = 0;
72855940Sbostic         (void) VOP_BWRITE(bp);
72951927Sbostic 
73051927Sbostic 	LFS_CLEANERINFO(cip, fs, bp);
73151927Sbostic 	--cip->clean;
73251927Sbostic 	++cip->dirty;
73355940Sbostic 	(void) VOP_BWRITE(bp);
73451927Sbostic 
73551927Sbostic 	fs->lfs_lastseg = fs->lfs_curseg;
73651927Sbostic 	fs->lfs_curseg = fs->lfs_nextseg;
73751927Sbostic 	for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
73851915Sbostic 		sn = (sn + 1) % fs->lfs_nseg;
73951927Sbostic 		if (sn == curseg)
74051915Sbostic 			panic("lfs_nextseg: no clean segments");
74151915Sbostic 		LFS_SEGENTRY(sup, fs, sn, bp);
74251915Sbostic 		isdirty = sup->su_flags & SEGUSE_DIRTY;
74352085Sbostic 		brelse(bp);
74451915Sbostic 		if (!isdirty)
74551915Sbostic 			break;
74651915Sbostic 	}
74755592Sbostic 
74855940Sbostic 	++fs->lfs_nactive;
74951927Sbostic 	fs->lfs_nextseg = sntoda(fs, sn);
75057072Smargo #ifdef DOSTATS
75157072Smargo 	++lfs_stats.segsused;
75257072Smargo #endif
75351188Sbostic }
75451188Sbostic 
75554264Sbostic int
75651188Sbostic lfs_writeseg(fs, sp)
75751499Sbostic 	struct lfs *fs;
75852085Sbostic 	struct segment *sp;
75951188Sbostic {
76055940Sbostic 	extern int locked_queue_count;
76152688Sbostic 	struct buf **bpp, *bp, *cbp;
76251188Sbostic 	SEGUSE *sup;
76352085Sbostic 	SEGSUM *ssp;
76451860Sbostic 	dev_t i_dev;
76551860Sbostic 	u_long *datap, *dp;
76669290Smckusick 	int do_again, i, nblocks, s;
76754264Sbostic 	int (*strategy)__P((struct vop_strategy_args *));
76854690Sbostic 	struct vop_strategy_args vop_strategy_a;
76955592Sbostic 	u_short ninos;
77052688Sbostic 	char *p;
77169290Smckusick long *lp;
77251188Sbostic 
77355940Sbostic 	/*
77455940Sbostic 	 * If there are no buffers other than the segment summary to write
77555940Sbostic 	 * and it is not a checkpoint, don't do anything.  On a checkpoint,
77655940Sbostic 	 * even if there aren't any buffers, you need to write the superblock.
77755940Sbostic 	 */
77857072Smargo 	if ((nblocks = sp->cbpp - sp->bpp) == 1)
77955551Sbostic 		return (0);
78052085Sbostic 
78169290Smckusick 	/* Update the segment usage information. */
78269290Smckusick 	LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
78369290Smckusick 
78469290Smckusick 	/* Loop through all blocks, except the segment summary. */
78569290Smckusick 	for (bpp = sp->bpp; ++bpp < sp->cbpp; )
78669290Smckusick 		sup->su_nbytes += (*bpp)->b_bcount;
78769290Smckusick 
78856159Smargo 	ssp = (SEGSUM *)sp->segsum;
78956159Smargo 
79056159Smargo 	ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
79156159Smargo 	sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode);
79256159Smargo 	sup->su_nbytes += LFS_SUMMARY_SIZE;
79356159Smargo 	sup->su_lastmod = time.tv_sec;
79456159Smargo 	sup->su_ninos += ninos;
79556159Smargo 	++sup->su_nsums;
79656159Smargo 	do_again = !(bp->b_flags & B_GATHERED);
79756159Smargo 	(void)VOP_BWRITE(bp);
79851188Sbostic 	/*
79952085Sbostic 	 * Compute checksum across data and then across summary; the first
80052085Sbostic 	 * block (the summary block) is skipped.  Set the create time here
80152085Sbostic 	 * so that it's guaranteed to be later than the inode mod times.
80251860Sbostic 	 *
80351860Sbostic 	 * XXX
80451860Sbostic 	 * Fix this to do it inline, instead of malloc/copy.
80551188Sbostic 	 */
80651860Sbostic 	datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
80756159Smargo 	for (bpp = sp->bpp, i = nblocks - 1; i--;) {
80856159Smargo 		if ((*++bpp)->b_flags & B_INVAL) {
80956159Smargo 			if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long)))
81056159Smargo 				panic("lfs_writeseg: copyin failed");
81156159Smargo 		} else
81264526Sbostic 			*dp++ = ((u_long *)(*bpp)->b_data)[0];
81356159Smargo 	}
81452103Sbostic 	ssp->ss_create = time.tv_sec;
81555803Sbostic 	ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long));
81652085Sbostic 	ssp->ss_sumsum =
81752085Sbostic 	    cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
81851927Sbostic 	free(datap, M_SEGMENT);
81956159Smargo #ifdef DIAGNOSTIC
82056159Smargo 	if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE)
82156159Smargo 		panic("lfs_writeseg: No diskspace for summary");
82256159Smargo #endif
82355592Sbostic 	fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE);
82454264Sbostic 
82551860Sbostic 	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
82653574Sheideman 	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
82751301Sbostic 
82852688Sbostic 	/*
82952688Sbostic 	 * When we simply write the blocks we lose a rotation for every block
83052688Sbostic 	 * written.  To avoid this problem, we allocate memory in chunks, copy
83157072Smargo 	 * the buffers into the chunk and write the chunk.  MAXPHYS is the
83257072Smargo 	 * largest size I/O devices can handle.
83352688Sbostic 	 * When the data is copied to the chunk, turn off the the B_LOCKED bit
83452688Sbostic 	 * and brelse the buffer (which will move them to the LRU list).  Add
83552688Sbostic 	 * the B_CALL flag to the buffer header so we can count I/O's for the
83652688Sbostic 	 * checkpoints and so we can release the allocated memory.
83752688Sbostic 	 *
83852688Sbostic 	 * XXX
83952688Sbostic 	 * This should be removed if the new virtual memory system allows us to
84052688Sbostic 	 * easily make the buffers contiguous in kernel memory and if that's
84152688Sbostic 	 * fast enough.
84252688Sbostic 	 */
84352688Sbostic 	for (bpp = sp->bpp, i = nblocks; i;) {
84456056Sbostic 		cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp,
84569290Smckusick 		    (*bpp)->b_blkno, MAXPHYS);
84652688Sbostic 		cbp->b_dev = i_dev;
84755940Sbostic 		cbp->b_flags |= B_ASYNC | B_BUSY;
84869290Smckusick 		cbp->b_bcount = 0;
84952688Sbostic 
85052688Sbostic 		s = splbio();
85152688Sbostic 		++fs->lfs_iocount;
85269290Smckusick 		for (p = cbp->b_data; i && cbp->b_bcount < MAXPHYS; i--) {
85369290Smckusick 			bp = *bpp;
85469290Smckusick 			if (bp->b_bcount > (MAXPHYS - cbp->b_bcount))
85569290Smckusick 				break;
85669290Smckusick 			bpp++;
85769290Smckusick 
85855940Sbostic 			/*
85955940Sbostic 			 * Fake buffers from the cleaner are marked as B_INVAL.
86055940Sbostic 			 * We need to copy the data from user space rather than
86155940Sbostic 			 * from the buffer indicated.
86255940Sbostic 			 * XXX == what do I do on an error?
86355940Sbostic 			 */
86455940Sbostic 			if (bp->b_flags & B_INVAL) {
86555940Sbostic 				if (copyin(bp->b_saveaddr, p, bp->b_bcount))
86655940Sbostic 					panic("lfs_writeseg: copyin failed");
86755940Sbostic 			} else
86864526Sbostic 				bcopy(bp->b_data, p, bp->b_bcount);
86969290Smckusick 
87052688Sbostic 			p += bp->b_bcount;
87169290Smckusick 			cbp->b_bcount += bp->b_bcount;
87255940Sbostic 			if (bp->b_flags & B_LOCKED)
87355940Sbostic 				--locked_queue_count;
87455940Sbostic 			bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
87554264Sbostic 			     B_LOCKED | B_GATHERED);
87655940Sbostic 			if (bp->b_flags & B_CALL) {
87755940Sbostic 				/* if B_CALL, it was created with newbuf */
87855940Sbostic 				brelvp(bp);
87957072Smargo 				if (!(bp->b_flags & B_INVAL))
88064526Sbostic 					free(bp->b_data, M_SEGMENT);
88155940Sbostic 				free(bp, M_SEGMENT);
88255940Sbostic 			} else {
88352688Sbostic 				bremfree(bp);
88456478Smargo 				bp->b_flags |= B_DONE;
88552688Sbostic 				reassignbuf(bp, bp->b_vp);
88655940Sbostic 				brelse(bp);
88752688Sbostic 			}
88851860Sbostic 		}
88956069Sbostic 		++cbp->b_vp->v_numoutput;
89052688Sbostic 		splx(s);
89156056Sbostic 		/*
89256056Sbostic 		 * XXXX This is a gross and disgusting hack.  Since these
89356056Sbostic 		 * buffers are physically addressed, they hang off the
89456056Sbostic 		 * device vnode (devvp).  As a result, they have no way
89556056Sbostic 		 * of getting to the LFS superblock or lfs structure to
89656056Sbostic 		 * keep track of the number of I/O's pending.  So, I am
89756056Sbostic 		 * going to stuff the fs into the saveaddr field of
89856056Sbostic 		 * the buffer (yuk).
89956056Sbostic 		 */
90056056Sbostic 		cbp->b_saveaddr = (caddr_t)fs;
90153574Sheideman 		vop_strategy_a.a_desc = VDESC(vop_strategy);
90253574Sheideman 		vop_strategy_a.a_bp = cbp;
90353574Sheideman 		(strategy)(&vop_strategy_a);
90451860Sbostic 	}
90557072Smargo 	/*
90657072Smargo 	 * XXX
90757072Smargo 	 * Vinvalbuf can move locked buffers off the locked queue
90857072Smargo 	 * and we have no way of knowing about this.  So, after
90957072Smargo 	 * doing a big write, we recalculate how many bufers are
91057072Smargo 	 * really still left on the locked queue.
91157072Smargo 	 */
91257072Smargo 	locked_queue_count = count_lock_queue();
91357072Smargo 	wakeup(&locked_queue_count);
91457072Smargo #ifdef DOSTATS
91557072Smargo 	++lfs_stats.psegwrites;
91657072Smargo 	lfs_stats.blocktot += nblocks - 1;
91757072Smargo 	if (fs->lfs_sp->seg_flags & SEGM_SYNC)
91857072Smargo 		++lfs_stats.psyncwrites;
91957072Smargo 	if (fs->lfs_sp->seg_flags & SEGM_CLEAN) {
92057072Smargo 		++lfs_stats.pcleanwrites;
92157072Smargo 		lfs_stats.cleanblocks += nblocks - 1;
92257072Smargo 	}
92357072Smargo #endif
92457072Smargo 	return (lfs_initseg(fs) || do_again);
92551188Sbostic }
92651188Sbostic 
92752077Sbostic void
92857072Smargo lfs_writesuper(fs)
92951499Sbostic 	struct lfs *fs;
93051301Sbostic {
93152085Sbostic 	struct buf *bp;
93251860Sbostic 	dev_t i_dev;
93353574Sheideman 	int (*strategy) __P((struct vop_strategy_args *));
93456069Sbostic 	int s;
93554690Sbostic 	struct vop_strategy_args vop_strategy_a;
93651301Sbostic 
93751860Sbostic 	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
93853574Sheideman 	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
93951356Sbostic 
94051342Sbostic 	/* Checksum the superblock and copy it into a buffer. */
94151499Sbostic 	fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
94256056Sbostic 	bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0],
94356056Sbostic 	    LFS_SBPAD);
94464526Sbostic 	*(struct lfs *)bp->b_data = *fs;
94551215Sbostic 
94657072Smargo 	/* XXX Toggle between first two superblocks; for now just write first */
94751860Sbostic 	bp->b_dev = i_dev;
94857072Smargo 	bp->b_flags |= B_BUSY | B_CALL | B_ASYNC;
94957072Smargo 	bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
95057072Smargo 	bp->b_iodone = lfs_supercallback;
95153574Sheideman 	vop_strategy_a.a_desc = VDESC(vop_strategy);
95253574Sheideman 	vop_strategy_a.a_bp = bp;
95356069Sbostic 	s = splbio();
95457072Smargo 	++bp->b_vp->v_numoutput;
95556069Sbostic 	splx(s);
95653574Sheideman 	(strategy)(&vop_strategy_a);
95751215Sbostic }
95851215Sbostic 
95951342Sbostic /*
96051342Sbostic  * Logical block number match routines used when traversing the dirty block
96151342Sbostic  * chain.
96251342Sbostic  */
96352077Sbostic int
96452077Sbostic lfs_match_data(fs, bp)
96551860Sbostic 	struct lfs *fs;
96652085Sbostic 	struct buf *bp;
96751215Sbostic {
96851342Sbostic 	return (bp->b_lblkno >= 0);
96951215Sbostic }
97051215Sbostic 
97152077Sbostic int
97252077Sbostic lfs_match_indir(fs, bp)
97351860Sbostic 	struct lfs *fs;
97452085Sbostic 	struct buf *bp;
97551215Sbostic {
97651860Sbostic 	int lbn;
97751860Sbostic 
97851860Sbostic 	lbn = bp->b_lblkno;
97951860Sbostic 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
98051215Sbostic }
98151215Sbostic 
98252077Sbostic int
98352077Sbostic lfs_match_dindir(fs, bp)
98451860Sbostic 	struct lfs *fs;
98552085Sbostic 	struct buf *bp;
98651215Sbostic {
98751860Sbostic 	int lbn;
98851860Sbostic 
98951860Sbostic 	lbn = bp->b_lblkno;
99051860Sbostic 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
99151215Sbostic }
99251215Sbostic 
99352077Sbostic int
99452077Sbostic lfs_match_tindir(fs, bp)
99551499Sbostic 	struct lfs *fs;
99652085Sbostic 	struct buf *bp;
99751342Sbostic {
99851860Sbostic 	int lbn;
99951342Sbostic 
100051860Sbostic 	lbn = bp->b_lblkno;
100151860Sbostic 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
100251860Sbostic }
100351342Sbostic 
100451860Sbostic /*
100551860Sbostic  * Allocate a new buffer header.
100651860Sbostic  */
100752085Sbostic struct buf *
100855940Sbostic lfs_newbuf(vp, daddr, size)
100955940Sbostic 	struct vnode *vp;
101068550Smckusick 	ufs_daddr_t daddr;
101151860Sbostic 	size_t size;
101251860Sbostic {
101352085Sbostic 	struct buf *bp;
101455940Sbostic 	size_t nbytes;
101551342Sbostic 
101655940Sbostic 	nbytes = roundup(size, DEV_BSIZE);
101757072Smargo 	bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK);
101857072Smargo 	bzero(bp, sizeof(struct buf));
101957072Smargo 	if (nbytes)
102064526Sbostic 		bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK);
102155940Sbostic 	bgetvp(vp, bp);
102255940Sbostic 	bp->b_bufsize = size;
102355940Sbostic 	bp->b_bcount = size;
102451860Sbostic 	bp->b_lblkno = daddr;
102551860Sbostic 	bp->b_blkno = daddr;
102651860Sbostic 	bp->b_error = 0;
102751860Sbostic 	bp->b_resid = 0;
102855940Sbostic 	bp->b_iodone = lfs_callback;
102956027Sbostic 	bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE;
103051860Sbostic 	return (bp);
103151860Sbostic }
103251342Sbostic 
103353347Sbostic void
103451860Sbostic lfs_callback(bp)
103552085Sbostic 	struct buf *bp;
103651860Sbostic {
103751860Sbostic 	struct lfs *fs;
103851342Sbostic 
103956056Sbostic 	fs = (struct lfs *)bp->b_saveaddr;
104051860Sbostic #ifdef DIAGNOSTIC
104151860Sbostic 	if (fs->lfs_iocount == 0)
104251860Sbostic 		panic("lfs_callback: zero iocount\n");
104351860Sbostic #endif
104451860Sbostic 	if (--fs->lfs_iocount == 0)
104552688Sbostic 		wakeup(&fs->lfs_iocount);
104651915Sbostic 
104755940Sbostic 	brelvp(bp);
104864526Sbostic 	free(bp->b_data, M_SEGMENT);
104955940Sbostic 	free(bp, M_SEGMENT);
105051860Sbostic }
105151342Sbostic 
105255940Sbostic void
105355940Sbostic lfs_supercallback(bp)
105455940Sbostic 	struct buf *bp;
105555940Sbostic {
105655940Sbostic 	brelvp(bp);
105764526Sbostic 	free(bp->b_data, M_SEGMENT);
105855940Sbostic 	free(bp, M_SEGMENT);
105955940Sbostic }
106055940Sbostic 
106151215Sbostic /*
106251188Sbostic  * Shellsort (diminishing increment sort) from Data Structures and
106351188Sbostic  * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
106451188Sbostic  * see also Knuth Vol. 3, page 84.  The increments are selected from
106551188Sbostic  * formula (8), page 95.  Roughly O(N^3/2).
106651188Sbostic  */
106751188Sbostic /*
106851188Sbostic  * This is our own private copy of shellsort because we want to sort
106951188Sbostic  * two parallel arrays (the array of buffer pointers and the array of
107051188Sbostic  * logical block numbers) simultaneously.  Note that we cast the array
107151188Sbostic  * of logical block numbers to a unsigned in this routine so that the
107251188Sbostic  * negative block numbers (meta data blocks) sort AFTER the data blocks.
107351188Sbostic  */
107452077Sbostic void
107552077Sbostic lfs_shellsort(bp_array, lb_array, nmemb)
107652085Sbostic 	struct buf **bp_array;
107768550Smckusick 	ufs_daddr_t *lb_array;
107851188Sbostic 	register int nmemb;
107951188Sbostic {
108051188Sbostic 	static int __rsshell_increments[] = { 4, 1, 0 };
108151188Sbostic 	register int incr, *incrp, t1, t2;
108252085Sbostic 	struct buf *bp_temp;
108351188Sbostic 	u_long lb_temp;
108451188Sbostic 
108551188Sbostic 	for (incrp = __rsshell_increments; incr = *incrp++;)
108651188Sbostic 		for (t1 = incr; t1 < nmemb; ++t1)
108751188Sbostic 			for (t2 = t1 - incr; t2 >= 0;)
108851188Sbostic 				if (lb_array[t2] > lb_array[t2 + incr]) {
108951188Sbostic 					lb_temp = lb_array[t2];
109051188Sbostic 					lb_array[t2] = lb_array[t2 + incr];
109151188Sbostic 					lb_array[t2 + incr] = lb_temp;
109251188Sbostic 					bp_temp = bp_array[t2];
109351188Sbostic 					bp_array[t2] = bp_array[t2 + incr];
109451188Sbostic 					bp_array[t2 + incr] = bp_temp;
109551188Sbostic 					t2 -= incr;
109651188Sbostic 				} else
109751188Sbostic 					break;
109851188Sbostic }
109955940Sbostic 
110057072Smargo /*
110165242Smckusick  * Check VXLOCK.  Return 1 if the vnode is locked.  Otherwise, vget it.
110257072Smargo  */
110357072Smargo lfs_vref(vp)
110457072Smargo 	register struct vnode *vp;
110557072Smargo {
1106*69421Smckusick 	struct proc *p = curproc;	/* XXX */
110757072Smargo 
1108*69421Smckusick 	if (vp->v_flag & VXLOCK)	/* XXX */
110957072Smargo 		return(1);
1110*69421Smckusick 	return (vget(vp, 0, p));
111157072Smargo }
111257072Smargo 
111357072Smargo void
111457072Smargo lfs_vunref(vp)
111557072Smargo 	register struct vnode *vp;
111657072Smargo {
111765242Smckusick 	extern int lfs_no_inactive;
111857072Smargo 
111957072Smargo 	/*
112065242Smckusick 	 * This is vrele except that we do not want to VOP_INACTIVE
112165242Smckusick 	 * this vnode. Rather than inline vrele here, we use a global
112265242Smckusick 	 * flag to tell lfs_inactive not to run. Yes, its gross.
112357072Smargo 	 */
112465242Smckusick 	lfs_no_inactive = 1;
112565242Smckusick 	vrele(vp);
112665242Smckusick 	lfs_no_inactive = 0;
112757072Smargo }
1128