xref: /csrg-svn/sys/ufs/lfs/lfs_segment.c (revision 69812)
151188Sbostic /*
263375Sbostic  * Copyright (c) 1991, 1993
363375Sbostic  *	The Regents of the University of California.  All rights reserved.
451188Sbostic  *
551188Sbostic  * %sccs.include.redist.c%
651188Sbostic  *
7*69812Smargo  *	@(#)lfs_segment.c	8.10 (Berkeley) 06/10/95
851188Sbostic  */
951188Sbostic 
1051490Sbostic #include <sys/param.h>
1151490Sbostic #include <sys/systm.h>
1251490Sbostic #include <sys/namei.h>
1352085Sbostic #include <sys/kernel.h>
1451490Sbostic #include <sys/resourcevar.h>
1551490Sbostic #include <sys/file.h>
1651490Sbostic #include <sys/stat.h>
1751490Sbostic #include <sys/buf.h>
1851490Sbostic #include <sys/proc.h>
1951490Sbostic #include <sys/conf.h>
2051490Sbostic #include <sys/vnode.h>
2151490Sbostic #include <sys/malloc.h>
2251490Sbostic #include <sys/mount.h>
2351188Sbostic 
2455033Smckusick #include <miscfs/specfs/specdev.h>
2555033Smckusick #include <miscfs/fifofs/fifo.h>
2655033Smckusick 
2751499Sbostic #include <ufs/ufs/quota.h>
2851499Sbostic #include <ufs/ufs/inode.h>
2951499Sbostic #include <ufs/ufs/dir.h>
3051499Sbostic #include <ufs/ufs/ufsmount.h>
3156478Smargo #include <ufs/ufs/ufs_extern.h>
3251490Sbostic 
3351499Sbostic #include <ufs/lfs/lfs.h>
3451499Sbostic #include <ufs/lfs/lfs_extern.h>
3551490Sbostic 
3657072Smargo extern int count_lock_queue __P((void));
3757072Smargo 
3855940Sbostic #define MAX_ACTIVE	10
3951188Sbostic /*
4051860Sbostic  * Determine if it's OK to start a partial in this segment, or if we need
4151860Sbostic  * to go on to a new segment.
4251301Sbostic  */
4351860Sbostic #define	LFS_PARTIAL_FITS(fs) \
4451860Sbostic 	((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
4551860Sbostic 	1 << (fs)->lfs_fsbtodb)
4651188Sbostic 
4753347Sbostic void	 lfs_callback __P((struct buf *));
4852085Sbostic void	 lfs_gather __P((struct lfs *, struct segment *,
4952085Sbostic 	     struct vnode *, int (*) __P((struct lfs *, struct buf *))));
5055940Sbostic int	 lfs_gatherblock __P((struct segment *, struct buf *, int *));
5168550Smckusick void	 lfs_iset __P((struct inode *, ufs_daddr_t, time_t));
5252085Sbostic int	 lfs_match_data __P((struct lfs *, struct buf *));
5352085Sbostic int	 lfs_match_dindir __P((struct lfs *, struct buf *));
5452085Sbostic int	 lfs_match_indir __P((struct lfs *, struct buf *));
5552085Sbostic int	 lfs_match_tindir __P((struct lfs *, struct buf *));
5652077Sbostic void	 lfs_newseg __P((struct lfs *));
5768550Smckusick void	 lfs_shellsort __P((struct buf **, ufs_daddr_t *, register int));
5855940Sbostic void	 lfs_supercallback __P((struct buf *));
5956027Sbostic void	 lfs_updatemeta __P((struct segment *));
6057072Smargo int	 lfs_vref __P((struct vnode *));
6157072Smargo void	 lfs_vunref __P((struct vnode *));
6252085Sbostic void	 lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
6354264Sbostic int	 lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
6454264Sbostic int	 lfs_writeseg __P((struct lfs *, struct segment *));
6557072Smargo void	 lfs_writesuper __P((struct lfs *));
6654264Sbostic void	 lfs_writevnodes __P((struct lfs *fs, struct mount *mp,
6754264Sbostic 	    struct segment *sp, int dirops));
6851188Sbostic 
6951860Sbostic int	lfs_allclean_wakeup;		/* Cleaner wakeup address. */
7051860Sbostic 
7157072Smargo /* Statistics Counters */
7257072Smargo #define DOSTATS
7357072Smargo struct lfs_stats lfs_stats;
7457072Smargo 
7557072Smargo /* op values to lfs_writevnodes */
7657072Smargo #define	VN_REG	0
7757072Smargo #define	VN_DIROP	1
7857072Smargo #define	VN_EMPTY	2
7957072Smargo 
8052328Sbostic /*
8152328Sbostic  * Ifile and meta data blocks are not marked busy, so segment writes MUST be
8252328Sbostic  * single threaded.  Currently, there are two paths into lfs_segwrite, sync()
8352328Sbostic  * and getnewbuf().  They both mark the file system busy.  Lfs_vflush()
8452328Sbostic  * explicitly marks the file system busy.  So lfs_segwrite is safe.  I think.
8552328Sbostic  */
8652328Sbostic 
8751188Sbostic int
lfs_vflush(vp)8852328Sbostic lfs_vflush(vp)
8952328Sbostic 	struct vnode *vp;
9052328Sbostic {
9152328Sbostic 	struct inode *ip;
9252328Sbostic 	struct lfs *fs;
9352328Sbostic 	struct segment *sp;
9452328Sbostic 
9554690Sbostic 	fs = VFSTOUFS(vp->v_mount)->um_lfs;
9656159Smargo 	if (fs->lfs_nactive > MAX_ACTIVE)
9757072Smargo 		return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP));
9857072Smargo 	lfs_seglock(fs, SEGM_SYNC);
9957072Smargo 	sp = fs->lfs_sp;
10056159Smargo 
10152328Sbostic 
10257072Smargo 	ip = VTOI(vp);
10365242Smckusick 	if (vp->v_dirtyblkhd.lh_first == NULL)
10457072Smargo 		lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY);
10552328Sbostic 
10655551Sbostic 	do {
10755551Sbostic 		do {
10865242Smckusick 			if (vp->v_dirtyblkhd.lh_first != NULL)
10955551Sbostic 				lfs_writefile(fs, sp, vp);
11055551Sbostic 		} while (lfs_writeinode(fs, sp, ip));
11152328Sbostic 
11255551Sbostic 	} while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
11352328Sbostic 
11457072Smargo #ifdef DOSTATS
11557072Smargo 	++lfs_stats.nwrites;
11657072Smargo 	if (sp->seg_flags & SEGM_SYNC)
11757072Smargo 		++lfs_stats.nsync_writes;
11857072Smargo 	if (sp->seg_flags & SEGM_CKP)
11957072Smargo 		++lfs_stats.ncheckpoints;
12057072Smargo #endif
12154690Sbostic 	lfs_segunlock(fs);
12252328Sbostic 	return (0);
12352328Sbostic }
12452328Sbostic 
12554264Sbostic void
lfs_writevnodes(fs,mp,sp,op)12657072Smargo lfs_writevnodes(fs, mp, sp, op)
12754264Sbostic 	struct lfs *fs;
12854264Sbostic 	struct mount *mp;
12954264Sbostic 	struct segment *sp;
13057072Smargo 	int op;
13154264Sbostic {
13254264Sbostic 	struct inode *ip;
13354264Sbostic 	struct vnode *vp;
13454264Sbostic 
13569290Smckusick /* BEGIN HACK */
13669290Smckusick #define	VN_OFFSET	(((void *)&vp->v_mntvnodes.le_next) - (void *)vp)
13769290Smckusick #define	BACK_VP(VP)	((struct vnode *)(((void *)VP->v_mntvnodes.le_prev) - VN_OFFSET))
13869290Smckusick #define	BEG_OF_VLIST	((struct vnode *)(((void *)&mp->mnt_vnodelist.lh_first) - VN_OFFSET))
13969290Smckusick 
14069290Smckusick /* Find last vnode. */
14169290Smckusick loop:   for (vp = mp->mnt_vnodelist.lh_first;
14269290Smckusick 	     vp && vp->v_mntvnodes.le_next != NULL;
14369290Smckusick 	     vp = vp->v_mntvnodes.le_next);
14469290Smckusick 	for (; vp && vp != BEG_OF_VLIST; vp = BACK_VP(vp)) {
14569290Smckusick /* END HACK */
14669290Smckusick /*
14765242Smckusick loop:
14865242Smckusick 	for (vp = mp->mnt_vnodelist.lh_first;
14965242Smckusick 	     vp != NULL;
15065242Smckusick 	     vp = vp->v_mntvnodes.le_next) {
15169290Smckusick */
15254264Sbostic 		/*
15354264Sbostic 		 * If the vnode that we are about to sync is no longer
15454264Sbostic 		 * associated with this mount point, start over.
15554264Sbostic 		 */
15654264Sbostic 		if (vp->v_mount != mp)
15754264Sbostic 			goto loop;
15854264Sbostic 
15957072Smargo 		/* XXX ignore dirops for now
16057072Smargo 		if (op == VN_DIROP && !(vp->v_flag & VDIROP) ||
16157072Smargo 		    op != VN_DIROP && (vp->v_flag & VDIROP))
16254264Sbostic 			continue;
16357072Smargo 		*/
16454264Sbostic 
16565242Smckusick 		if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first)
16657072Smargo 			continue;
16757072Smargo 
16857072Smargo 		if (vp->v_type == VNON)
16957072Smargo 			continue;
17057072Smargo 
17157072Smargo 		if (lfs_vref(vp))
17257072Smargo 			continue;
17357072Smargo 
17454264Sbostic 		/*
17554264Sbostic 		 * Write the inode/file if dirty and it's not the
17654264Sbostic 		 * the IFILE.
17754264Sbostic 		 */
17854264Sbostic 		ip = VTOI(vp);
17964611Sbostic 		if ((ip->i_flag &
18064611Sbostic 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE) ||
18165242Smckusick 		    vp->v_dirtyblkhd.lh_first != NULL) &&
18254264Sbostic 		    ip->i_number != LFS_IFILE_INUM) {
18365242Smckusick 			if (vp->v_dirtyblkhd.lh_first != NULL)
18454264Sbostic 				lfs_writefile(fs, sp, vp);
18554264Sbostic 			(void) lfs_writeinode(fs, sp, ip);
18654264Sbostic 		}
18754264Sbostic 		vp->v_flag &= ~VDIROP;
18857072Smargo 		lfs_vunref(vp);
18954264Sbostic 	}
19054264Sbostic }
19154264Sbostic 
19252328Sbostic int
lfs_segwrite(mp,flags)19357072Smargo lfs_segwrite(mp, flags)
19452085Sbostic 	struct mount *mp;
19557072Smargo 	int flags;			/* Do a checkpoint. */
19651188Sbostic {
19769421Smckusick 	struct proc *p = curproc;	/* XXX */
19855592Sbostic 	struct buf *bp;
19952085Sbostic 	struct inode *ip;
20051499Sbostic 	struct lfs *fs;
20152085Sbostic 	struct segment *sp;
20252085Sbostic 	struct vnode *vp;
20355592Sbostic 	SEGUSE *segusep;
20468550Smckusick 	ufs_daddr_t ibno;
20555940Sbostic 	CLEANERINFO *cip;
20665473Sbostic 	int clean, do_ckp, error, i;
20751188Sbostic 
20852328Sbostic 	fs = VFSTOUFS(mp)->um_lfs;
20955940Sbostic 
21055940Sbostic  	/*
21155940Sbostic  	 * If we have fewer than 2 clean segments, wait until cleaner
21255940Sbostic 	 * writes.
21355940Sbostic  	 */
21455940Sbostic 	do {
21555940Sbostic 		LFS_CLEANERINFO(cip, fs, bp);
21655940Sbostic 		clean = cip->clean;
21755940Sbostic 		brelse(bp);
218*69812Smargo 		if (clean <= 2 || fs->lfs_avail <= 0) {
21969290Smckusick 			/* printf ("segs clean: %d\n", clean); */
22055940Sbostic 			wakeup(&lfs_allclean_wakeup);
221*69812Smargo 			wakeup(&fs->lfs_nextseg);
22255940Sbostic 			if (error = tsleep(&fs->lfs_avail, PRIBIO + 1,
22355940Sbostic 			    "lfs writer", 0))
22455940Sbostic 				return (error);
22555940Sbostic 		}
226*69812Smargo 	} while (clean <= 2 || fs->lfs_avail <= 0);
22752085Sbostic 
22851860Sbostic 	/*
22952328Sbostic 	 * Allocate a segment structure and enough space to hold pointers to
23052328Sbostic 	 * the maximum possible number of buffers which can be described in a
23152328Sbostic 	 * single summary block.
23252328Sbostic 	 */
23357072Smargo 	do_ckp = flags & SEGM_CKP || fs->lfs_nactive > MAX_ACTIVE;
23457072Smargo 	lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0));
23557072Smargo 	sp = fs->lfs_sp;
23652328Sbostic 
23757072Smargo 	lfs_writevnodes(fs, mp, sp, VN_REG);
23851342Sbostic 
23957072Smargo 	/* XXX ignore ordering of dirops for now */
24057072Smargo 	/* XXX
24154264Sbostic 	fs->lfs_writer = 1;
24254264Sbostic 	if (fs->lfs_dirops && (error =
24354264Sbostic 	    tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) {
24454264Sbostic 		free(sp->bpp, M_SEGMENT);
24554264Sbostic 		free(sp, M_SEGMENT);
24654264Sbostic 		fs->lfs_writer = 0;
24755551Sbostic 		return (error);
24854264Sbostic 	}
24951860Sbostic 
25057072Smargo 	lfs_writevnodes(fs, mp, sp, VN_DIROP);
25157072Smargo 	*/
25251860Sbostic 
25354264Sbostic 	/*
25455592Sbostic 	 * If we are doing a checkpoint, mark everything since the
25555592Sbostic 	 * last checkpoint as no longer ACTIVE.
25654264Sbostic 	 */
25755592Sbostic 	if (do_ckp)
25855592Sbostic 		for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz;
25955592Sbostic 		     --ibno >= fs->lfs_cleansz; ) {
26055592Sbostic 			if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize,
26155592Sbostic 			    NOCRED, &bp))
26255592Sbostic 
26355592Sbostic 				panic("lfs: ifile read");
26464526Sbostic 			segusep = (SEGUSE *)bp->b_data;
26555592Sbostic 			for (i = fs->lfs_sepb; i--; segusep++)
26655592Sbostic 				segusep->su_flags &= ~SEGUSE_ACTIVE;
26755592Sbostic 
26855940Sbostic 			error = VOP_BWRITE(bp);
26955592Sbostic 		}
27055592Sbostic 
27154264Sbostic 	if (do_ckp || fs->lfs_doifile) {
27256086Sbostic redo:
27354264Sbostic 		vp = fs->lfs_ivnode;
27469421Smckusick 		while (vget(vp, LK_EXCLUSIVE, p))
27569421Smckusick 			continue;
27652328Sbostic 		ip = VTOI(vp);
27765242Smckusick 		if (vp->v_dirtyblkhd.lh_first != NULL)
27855592Sbostic 			lfs_writefile(fs, sp, vp);
27955592Sbostic 		(void)lfs_writeinode(fs, sp, ip);
28052077Sbostic 		vput(vp);
28157072Smargo 		if (lfs_writeseg(fs, sp) && do_ckp)
28256086Sbostic 			goto redo;
28354264Sbostic 	} else
28454264Sbostic 		(void) lfs_writeseg(fs, sp);
28551342Sbostic 
28651215Sbostic 	/*
28751860Sbostic 	 * If the I/O count is non-zero, sleep until it reaches zero.  At the
28851860Sbostic 	 * moment, the user's process hangs around so we can sleep.
28951215Sbostic 	 */
29057072Smargo 	/* XXX ignore dirops for now
29154264Sbostic 	fs->lfs_writer = 0;
29254264Sbostic 	fs->lfs_doifile = 0;
29354264Sbostic 	wakeup(&fs->lfs_dirops);
29457072Smargo 	*/
29554264Sbostic 
29657072Smargo #ifdef DOSTATS
29757072Smargo 	++lfs_stats.nwrites;
29857072Smargo 	if (sp->seg_flags & SEGM_SYNC)
29957072Smargo 		++lfs_stats.nsync_writes;
30057072Smargo 	if (sp->seg_flags & SEGM_CKP)
30157072Smargo 		++lfs_stats.ncheckpoints;
30257072Smargo #endif
30354690Sbostic 	lfs_segunlock(fs);
30451860Sbostic 	return (0);
30551188Sbostic }
30651188Sbostic 
30751860Sbostic /*
30851860Sbostic  * Write the dirty blocks associated with a vnode.
30951860Sbostic  */
31052077Sbostic void
lfs_writefile(fs,sp,vp)31151860Sbostic lfs_writefile(fs, sp, vp)
31251499Sbostic 	struct lfs *fs;
31352085Sbostic 	struct segment *sp;
31452085Sbostic 	struct vnode *vp;
31551188Sbostic {
31651860Sbostic 	struct buf *bp;
31752085Sbostic 	struct finfo *fip;
31851860Sbostic 	IFILE *ifp;
31951188Sbostic 
32052085Sbostic 	if (sp->seg_bytes_left < fs->lfs_bsize ||
32157072Smargo 	    sp->sum_bytes_left < sizeof(struct finfo))
32254264Sbostic 		(void) lfs_writeseg(fs, sp);
32357072Smargo 
32468550Smckusick 	sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(ufs_daddr_t);
32556478Smargo 	++((SEGSUM *)(sp->segsum))->ss_nfinfo;
32651215Sbostic 
32752085Sbostic 	fip = sp->fip;
32852085Sbostic 	fip->fi_nblocks = 0;
32952085Sbostic 	fip->fi_ino = VTOI(vp)->i_number;
33052085Sbostic 	LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
33152085Sbostic 	fip->fi_version = ifp->if_version;
33252085Sbostic 	brelse(bp);
33351188Sbostic 
33452085Sbostic 	/*
33552085Sbostic 	 * It may not be necessary to write the meta-data blocks at this point,
33652085Sbostic 	 * as the roll-forward recovery code should be able to reconstruct the
33752085Sbostic 	 * list.
33852085Sbostic 	 */
33952085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_data);
34052085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_indir);
34152085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_dindir);
34251860Sbostic #ifdef TRIPLE
34352085Sbostic 	lfs_gather(fs, sp, vp, lfs_match_tindir);
34451860Sbostic #endif
34551342Sbostic 
34652085Sbostic 	fip = sp->fip;
34752085Sbostic 	if (fip->fi_nblocks != 0) {
34852085Sbostic 		sp->fip =
34952085Sbostic 		    (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
35068550Smckusick 		    sizeof(ufs_daddr_t) * (fip->fi_nblocks - 1));
35155940Sbostic 		sp->start_lbp = &sp->fip->fi_blocks[0];
35256478Smargo 	} else {
35368550Smckusick 		sp->sum_bytes_left += sizeof(struct finfo) - sizeof(ufs_daddr_t);
35456478Smargo 		--((SEGSUM *)(sp->segsum))->ss_nfinfo;
35556478Smargo 	}
35651215Sbostic }
35751215Sbostic 
35854264Sbostic int
lfs_writeinode(fs,sp,ip)35951915Sbostic lfs_writeinode(fs, sp, ip)
36051915Sbostic 	struct lfs *fs;
36152085Sbostic 	struct segment *sp;
36252085Sbostic 	struct inode *ip;
36351915Sbostic {
36452085Sbostic 	struct buf *bp, *ibp;
36552077Sbostic 	IFILE *ifp;
36652682Sstaelin 	SEGUSE *sup;
36768550Smckusick 	ufs_daddr_t daddr;
36852077Sbostic 	ino_t ino;
36957072Smargo 	int error, i, ndx;
37054264Sbostic 	int redo_ifile = 0;
37151915Sbostic 
37264611Sbostic 	if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)))
37356190Smargo 		return(0);
37455940Sbostic 
37551915Sbostic 	/* Allocate a new inode block if necessary. */
37651915Sbostic 	if (sp->ibp == NULL) {
37751915Sbostic 		/* Allocate a new segment if necessary. */
37851915Sbostic 		if (sp->seg_bytes_left < fs->lfs_bsize ||
37968550Smckusick 		    sp->sum_bytes_left < sizeof(ufs_daddr_t))
38054264Sbostic 			(void) lfs_writeseg(fs, sp);
38151915Sbostic 
38251915Sbostic 		/* Get next inode block. */
38352682Sstaelin 		daddr = fs->lfs_offset;
38451915Sbostic 		fs->lfs_offset += fsbtodb(fs, 1);
38551915Sbostic 		sp->ibp = *sp->cbpp++ =
38656056Sbostic 		    lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr,
38756056Sbostic 		    fs->lfs_bsize);
38857072Smargo 		/* Zero out inode numbers */
38957072Smargo 		for (i = 0; i < INOPB(fs); ++i)
39064526Sbostic 			((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0;
39155940Sbostic 		++sp->start_bpp;
39255940Sbostic 		fs->lfs_avail -= fsbtodb(fs, 1);
39352688Sbostic 		/* Set remaining space counters. */
39451915Sbostic 		sp->seg_bytes_left -= fs->lfs_bsize;
39568550Smckusick 		sp->sum_bytes_left -= sizeof(ufs_daddr_t);
39668550Smckusick 		ndx = LFS_SUMMARY_SIZE / sizeof(ufs_daddr_t) -
39751915Sbostic 		    sp->ninodes / INOPB(fs) - 1;
39868550Smckusick 		((ufs_daddr_t *)(sp->segsum))[ndx] = daddr;
39951915Sbostic 	}
40051915Sbostic 
40152085Sbostic 	/* Update the inode times and copy the inode onto the inode page. */
40264611Sbostic 	if (ip->i_flag & IN_MODIFIED)
40356056Sbostic 		--fs->lfs_uinodes;
40452077Sbostic 	ITIMES(ip, &time, &time);
40564611Sbostic 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
40651915Sbostic 	bp = sp->ibp;
40764526Sbostic 	((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = ip->i_din;
40851915Sbostic 	/* Increment inode count in segment summary block. */
40951915Sbostic 	++((SEGSUM *)(sp->segsum))->ss_ninos;
41051915Sbostic 
41151915Sbostic 	/* If this page is full, set flag to allocate a new page. */
41251915Sbostic 	if (++sp->ninodes % INOPB(fs) == 0)
41351915Sbostic 		sp->ibp = NULL;
41451915Sbostic 
41551915Sbostic 	/*
41652077Sbostic 	 * If updating the ifile, update the super-block.  Update the disk
41752077Sbostic 	 * address and access times for this inode in the ifile.
41851915Sbostic 	 */
41952077Sbostic 	ino = ip->i_number;
42055696Sbostic 	if (ino == LFS_IFILE_INUM) {
42155696Sbostic 		daddr = fs->lfs_idaddr;
42251915Sbostic 		fs->lfs_idaddr = bp->b_blkno;
42355696Sbostic 	} else {
42455696Sbostic 		LFS_IENTRY(ifp, fs, ino, ibp);
42555696Sbostic 		daddr = ifp->if_daddr;
42655696Sbostic 		ifp->if_daddr = bp->b_blkno;
42755940Sbostic 		error = VOP_BWRITE(ibp);
42855696Sbostic 	}
42952077Sbostic 
43054264Sbostic 	/*
43154264Sbostic 	 * No need to update segment usage if there was no former inode address
43254264Sbostic 	 * or if the last inode address is in the current partial segment.
43354264Sbostic 	 */
43454264Sbostic 	if (daddr != LFS_UNUSED_DADDR &&
43555803Sbostic 	    !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) {
43652682Sstaelin 		LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
43752682Sstaelin #ifdef DIAGNOSTIC
43854264Sbostic 		if (sup->su_nbytes < sizeof(struct dinode)) {
43952819Sbostic 			/* XXX -- Change to a panic. */
44052819Sbostic 			printf("lfs: negative bytes (segment %d)\n",
44152682Sstaelin 			    datosn(fs, daddr));
44254264Sbostic 			panic("negative bytes");
44354264Sbostic 		}
44452682Sstaelin #endif
44552682Sstaelin 		sup->su_nbytes -= sizeof(struct dinode);
44656069Sbostic 		redo_ifile =
44756069Sbostic 		    (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
44855940Sbostic 		error = VOP_BWRITE(bp);
44952682Sstaelin 	}
45055551Sbostic 	return (redo_ifile);
45151915Sbostic }
45251915Sbostic 
45355940Sbostic int
lfs_gatherblock(sp,bp,sptr)45455940Sbostic lfs_gatherblock(sp, bp, sptr)
45555940Sbostic 	struct segment *sp;
45655940Sbostic 	struct buf *bp;
45755940Sbostic 	int *sptr;
45855940Sbostic {
45955940Sbostic 	struct lfs *fs;
46055940Sbostic 	int version;
46155940Sbostic 
46255940Sbostic 	/*
46355940Sbostic 	 * If full, finish this segment.  We may be doing I/O, so
46455940Sbostic 	 * release and reacquire the splbio().
46555940Sbostic 	 */
46656027Sbostic #ifdef DIAGNOSTIC
46756027Sbostic 	if (sp->vp == NULL)
46856027Sbostic 		panic ("lfs_gatherblock: Null vp in segment");
46956027Sbostic #endif
47055940Sbostic 	fs = sp->fs;
47168550Smckusick 	if (sp->sum_bytes_left < sizeof(ufs_daddr_t) ||
47269290Smckusick 	    sp->seg_bytes_left < bp->b_bcount) {
47355940Sbostic 		if (sptr)
47455940Sbostic 			splx(*sptr);
47556027Sbostic 		lfs_updatemeta(sp);
47655940Sbostic 
47755940Sbostic 		version = sp->fip->fi_version;
47855940Sbostic 		(void) lfs_writeseg(fs, sp);
47955940Sbostic 
48055940Sbostic 		sp->fip->fi_version = version;
48156027Sbostic 		sp->fip->fi_ino = VTOI(sp->vp)->i_number;
48256478Smargo 		/* Add the current file to the segment summary. */
48356478Smargo 		++((SEGSUM *)(sp->segsum))->ss_nfinfo;
48455940Sbostic 		sp->sum_bytes_left -=
48568550Smckusick 		    sizeof(struct finfo) - sizeof(ufs_daddr_t);
48655940Sbostic 
48755940Sbostic 		if (sptr)
48855940Sbostic 			*sptr = splbio();
48955940Sbostic 		return(1);
49055940Sbostic 	}
49155940Sbostic 
49255940Sbostic 	/* Insert into the buffer list, update the FINFO block. */
49355940Sbostic 	bp->b_flags |= B_GATHERED;
49455940Sbostic 	*sp->cbpp++ = bp;
49555940Sbostic 	sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno;
49655940Sbostic 
49768550Smckusick 	sp->sum_bytes_left -= sizeof(ufs_daddr_t);
49869290Smckusick 	sp->seg_bytes_left -= bp->b_bcount;
49955940Sbostic 	return(0);
50055940Sbostic }
50155940Sbostic 
50252077Sbostic void
lfs_gather(fs,sp,vp,match)50351215Sbostic lfs_gather(fs, sp, vp, match)
50451499Sbostic 	struct lfs *fs;
50552085Sbostic 	struct segment *sp;
50652085Sbostic 	struct vnode *vp;
50752085Sbostic 	int (*match) __P((struct lfs *, struct buf *));
50851215Sbostic {
50955940Sbostic 	struct buf *bp;
51051342Sbostic 	int s;
51151215Sbostic 
51256027Sbostic 	sp->vp = vp;
51355940Sbostic 	s = splbio();
51469290Smckusick /* This is a hack to see if ordering the blocks in LFS makes a difference. */
51569290Smckusick /* BEGIN HACK */
51669290Smckusick #define	BUF_OFFSET	(((void *)&bp->b_vnbufs.le_next) - (void *)bp)
51769290Smckusick #define	BACK_BUF(BP)	((struct buf *)(((void *)BP->b_vnbufs.le_prev) - BUF_OFFSET))
51869290Smckusick #define	BEG_OF_LIST	((struct buf *)(((void *)&vp->v_dirtyblkhd.lh_first) - BUF_OFFSET))
51969290Smckusick 
52069290Smckusick 
52169290Smckusick /*loop:	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {*/
52269290Smckusick /* Find last buffer. */
52369290Smckusick loop:   for (bp = vp->v_dirtyblkhd.lh_first; bp && bp->b_vnbufs.le_next != NULL;
52469290Smckusick 	    bp = bp->b_vnbufs.le_next);
52569290Smckusick 	for (; bp && bp != BEG_OF_LIST; bp = BACK_BUF(bp)) {
52669290Smckusick /* END HACK */
52754264Sbostic 		if (bp->b_flags & B_BUSY || !match(fs, bp) ||
52854264Sbostic 		    bp->b_flags & B_GATHERED)
52951215Sbostic 			continue;
53051342Sbostic #ifdef DIAGNOSTIC
53151860Sbostic 		if (!(bp->b_flags & B_DELWRI))
53251915Sbostic 			panic("lfs_gather: bp not B_DELWRI");
53351860Sbostic 		if (!(bp->b_flags & B_LOCKED))
53451915Sbostic 			panic("lfs_gather: bp not B_LOCKED");
53551342Sbostic #endif
53655940Sbostic 		if (lfs_gatherblock(sp, bp, &s))
53753145Sstaelin 			goto loop;
53851188Sbostic 	}
53951215Sbostic 	splx(s);
54056027Sbostic 	lfs_updatemeta(sp);
54156027Sbostic 	sp->vp = NULL;
54251188Sbostic }
54351188Sbostic 
54455940Sbostic 
54551342Sbostic /*
54651342Sbostic  * Update the metadata that points to the blocks listed in the FINFO
54751188Sbostic  * array.
54851188Sbostic  */
54952077Sbostic void
lfs_updatemeta(sp)55056027Sbostic lfs_updatemeta(sp)
55152085Sbostic 	struct segment *sp;
55251188Sbostic {
55351915Sbostic 	SEGUSE *sup;
55452085Sbostic 	struct buf *bp;
55555940Sbostic 	struct lfs *fs;
55656027Sbostic 	struct vnode *vp;
55756478Smargo 	struct indir a[NIADDR + 2], *ap;
55852085Sbostic 	struct inode *ip;
55968550Smckusick 	ufs_daddr_t daddr, lbn, off;
56069290Smckusick 	int error, i, nblocks, num;
56151188Sbostic 
56256027Sbostic 	vp = sp->vp;
56355940Sbostic 	nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
56469290Smckusick 	if (nblocks < 0)
56569290Smckusick 		panic("This is a bad thing\n");
56656027Sbostic 	if (vp == NULL || nblocks == 0)
56751215Sbostic 		return;
56851215Sbostic 
56951915Sbostic 	/* Sort the blocks. */
57055940Sbostic 	if (!(sp->seg_flags & SEGM_CLEAN))
57155940Sbostic 		lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks);
57251215Sbostic 
57351915Sbostic 	/*
57469290Smckusick 	 * Record the length of the last block in case it's a fragment.
57569290Smckusick 	 * If there are indirect blocks present, they sort last.  An
57669290Smckusick 	 * indirect block will be lfs_bsize and its presence indicates
57769290Smckusick 	 * that you cannot have fragments.
57869290Smckusick 	 */
57969290Smckusick 	sp->fip->fi_lastlength = sp->start_bpp[nblocks - 1]->b_bcount;
58069290Smckusick 
58169290Smckusick 	/*
58251915Sbostic 	 * Assign disk addresses, and update references to the logical
58351915Sbostic 	 * block and the segment usage information.
58451915Sbostic 	 */
58555940Sbostic 	fs = sp->fs;
58655940Sbostic 	for (i = nblocks; i--; ++sp->start_bpp) {
58755940Sbostic 		lbn = *sp->start_lbp++;
58855940Sbostic 		(*sp->start_bpp)->b_blkno = off = fs->lfs_offset;
58969290Smckusick 		fs->lfs_offset +=
59069290Smckusick 		    fragstodb(fs, numfrags(fs, (*sp->start_bpp)->b_bcount));
59151215Sbostic 
59256478Smargo 		if (error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL))
59356478Smargo 			panic("lfs_updatemeta: ufs_bmaparray %d", error);
59451860Sbostic 		ip = VTOI(vp);
59551860Sbostic 		switch (num) {
59651860Sbostic 		case 0:
59751915Sbostic 			ip->i_db[lbn] = off;
59851860Sbostic 			break;
59951860Sbostic 		case 1:
60051915Sbostic 			ip->i_ib[a[0].in_off] = off;
60151860Sbostic 			break;
60251860Sbostic 		default:
60351860Sbostic 			ap = &a[num - 1];
60451860Sbostic 			if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
60551860Sbostic 				panic("lfs_updatemeta: bread bno %d",
60651860Sbostic 				    ap->in_lbn);
60755458Sbostic 			/*
60855458Sbostic 			 * Bread may create a new indirect block which needs
60955458Sbostic 			 * to get counted for the inode.
61055458Sbostic 			 */
61155592Sbostic 			if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) {
61269290Smckusick 				ip->i_blocks += fsbtodb(fs, 1);
61369290Smckusick 				fs->lfs_bfree -= fragstodb(fs, fs->lfs_frag);
61455592Sbostic 			}
61568550Smckusick 			((ufs_daddr_t *)bp->b_data)[ap->in_off] = off;
61653530Sheideman 			VOP_BWRITE(bp);
61751188Sbostic 		}
61851915Sbostic 
61951915Sbostic 		/* Update segment usage information. */
62057072Smargo 		if (daddr != UNASSIGNED &&
62157072Smargo 		    !(daddr >= fs->lfs_lastpseg && daddr <= off)) {
62251915Sbostic 			LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
62351915Sbostic #ifdef DIAGNOSTIC
62469290Smckusick 			if (sup->su_nbytes < (*sp->start_bpp)->b_bcount) {
62552819Sbostic 				/* XXX -- Change to a panic. */
62652819Sbostic 				printf("lfs: negative bytes (segment %d)\n",
62751915Sbostic 				    datosn(fs, daddr));
628*69812Smargo 				printf("lfs: bp = 0x%x, addr = 0x%x\n",
629*69812Smargo 						bp, bp->b_un.b_addr);
63054264Sbostic 				panic ("Negative Bytes");
63154264Sbostic 			}
63251915Sbostic #endif
63369290Smckusick 			sup->su_nbytes -= (*sp->start_bpp)->b_bcount;
63455940Sbostic 			error = VOP_BWRITE(bp);
63551915Sbostic 		}
63651188Sbostic 	}
63751188Sbostic }
63851188Sbostic 
63951915Sbostic /*
64051915Sbostic  * Start a new segment.
64151915Sbostic  */
64257072Smargo int
lfs_initseg(fs)64357072Smargo lfs_initseg(fs)
64451499Sbostic 	struct lfs *fs;
64557072Smargo {
64652085Sbostic 	struct segment *sp;
64751915Sbostic 	SEGUSE *sup;
64851915Sbostic 	SEGSUM *ssp;
64951915Sbostic 	struct buf *bp;
65057072Smargo 	int repeat;
65151215Sbostic 
65257072Smargo 	sp = fs->lfs_sp;
65357072Smargo 
65457072Smargo 	repeat = 0;
65551915Sbostic 	/* Advance to the next segment. */
65651927Sbostic 	if (!LFS_PARTIAL_FITS(fs)) {
65752682Sstaelin 		/* Wake up any cleaning procs waiting on this file system. */
65852688Sbostic 		wakeup(&lfs_allclean_wakeup);
659*69812Smargo 		wakeup(&fs->lfs_nextseg);
66052682Sstaelin 
66151927Sbostic 		lfs_newseg(fs);
66257072Smargo 		repeat = 1;
66351927Sbostic 		fs->lfs_offset = fs->lfs_curseg;
66451915Sbostic 		sp->seg_number = datosn(fs, fs->lfs_curseg);
66551915Sbostic 		sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
66651915Sbostic 
66751915Sbostic 		/*
66851927Sbostic 		 * If the segment contains a superblock, update the offset
66951927Sbostic 		 * and summary address to skip over it.
67051915Sbostic 		 */
67152077Sbostic 		LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
67251927Sbostic 		if (sup->su_flags & SEGUSE_SUPERBLOCK) {
67351915Sbostic 			fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
67451915Sbostic 			sp->seg_bytes_left -= LFS_SBPAD;
67551215Sbostic 		}
67652085Sbostic 		brelse(bp);
67751915Sbostic 	} else {
67851915Sbostic 		sp->seg_number = datosn(fs, fs->lfs_curseg);
67951915Sbostic 		sp->seg_bytes_left = (fs->lfs_dbpseg -
68051915Sbostic 		    (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
68151915Sbostic 	}
68254264Sbostic 	fs->lfs_lastpseg = fs->lfs_offset;
68351342Sbostic 
68455940Sbostic 	sp->fs = fs;
68551915Sbostic 	sp->ibp = NULL;
68651915Sbostic 	sp->ninodes = 0;
68751342Sbostic 
68851915Sbostic 	/* Get a new buffer for SEGSUM and enter it into the buffer list. */
68951915Sbostic 	sp->cbpp = sp->bpp;
69056056Sbostic 	*sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset,
69156056Sbostic 	     LFS_SUMMARY_SIZE);
69264526Sbostic 	sp->segsum = (*sp->cbpp)->b_data;
69357072Smargo 	bzero(sp->segsum, LFS_SUMMARY_SIZE);
69455940Sbostic 	sp->start_bpp = ++sp->cbpp;
69551915Sbostic 	fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
69651342Sbostic 
69751915Sbostic 	/* Set point to SEGSUM, initialize it. */
69851915Sbostic 	ssp = sp->segsum;
69951915Sbostic 	ssp->ss_next = fs->lfs_nextseg;
70051915Sbostic 	ssp->ss_nfinfo = ssp->ss_ninos = 0;
70169290Smckusick 	ssp->ss_magic = SS_MAGIC;
70251342Sbostic 
70351915Sbostic 	/* Set pointer to first FINFO, initialize it. */
70468119Smckusick 	sp->fip = (struct finfo *)((caddr_t)sp->segsum + sizeof(SEGSUM));
70551915Sbostic 	sp->fip->fi_nblocks = 0;
70655940Sbostic 	sp->start_lbp = &sp->fip->fi_blocks[0];
70769290Smckusick 	sp->fip->fi_lastlength = 0;
70851342Sbostic 
70951915Sbostic 	sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
71051915Sbostic 	sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
71157072Smargo 
71257072Smargo 	return(repeat);
71351915Sbostic }
71451342Sbostic 
71551915Sbostic /*
71651915Sbostic  * Return the next segment to write.
71751915Sbostic  */
71852077Sbostic void
lfs_newseg(fs)71951915Sbostic lfs_newseg(fs)
72051915Sbostic 	struct lfs *fs;
72151915Sbostic {
72251927Sbostic 	CLEANERINFO *cip;
72351915Sbostic 	SEGUSE *sup;
72451915Sbostic 	struct buf *bp;
72565473Sbostic 	int curseg, isdirty, sn;
72651915Sbostic 
72755592Sbostic         LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
72856159Smargo         sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
72956056Sbostic 	sup->su_nbytes = 0;
73056056Sbostic 	sup->su_nsums = 0;
73156056Sbostic 	sup->su_ninos = 0;
73255940Sbostic         (void) VOP_BWRITE(bp);
73351927Sbostic 
73451927Sbostic 	LFS_CLEANERINFO(cip, fs, bp);
73551927Sbostic 	--cip->clean;
73651927Sbostic 	++cip->dirty;
73755940Sbostic 	(void) VOP_BWRITE(bp);
73851927Sbostic 
73951927Sbostic 	fs->lfs_lastseg = fs->lfs_curseg;
74051927Sbostic 	fs->lfs_curseg = fs->lfs_nextseg;
74151927Sbostic 	for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
74251915Sbostic 		sn = (sn + 1) % fs->lfs_nseg;
74351927Sbostic 		if (sn == curseg)
74451915Sbostic 			panic("lfs_nextseg: no clean segments");
74551915Sbostic 		LFS_SEGENTRY(sup, fs, sn, bp);
74651915Sbostic 		isdirty = sup->su_flags & SEGUSE_DIRTY;
74752085Sbostic 		brelse(bp);
74851915Sbostic 		if (!isdirty)
74951915Sbostic 			break;
75051915Sbostic 	}
75155592Sbostic 
75255940Sbostic 	++fs->lfs_nactive;
75351927Sbostic 	fs->lfs_nextseg = sntoda(fs, sn);
75457072Smargo #ifdef DOSTATS
75557072Smargo 	++lfs_stats.segsused;
75657072Smargo #endif
75751188Sbostic }
75851188Sbostic 
75954264Sbostic int
lfs_writeseg(fs,sp)76051188Sbostic lfs_writeseg(fs, sp)
76151499Sbostic 	struct lfs *fs;
76252085Sbostic 	struct segment *sp;
76351188Sbostic {
76455940Sbostic 	extern int locked_queue_count;
76552688Sbostic 	struct buf **bpp, *bp, *cbp;
76651188Sbostic 	SEGUSE *sup;
76752085Sbostic 	SEGSUM *ssp;
76851860Sbostic 	dev_t i_dev;
76951860Sbostic 	u_long *datap, *dp;
77069290Smckusick 	int do_again, i, nblocks, s;
77154264Sbostic 	int (*strategy)__P((struct vop_strategy_args *));
77254690Sbostic 	struct vop_strategy_args vop_strategy_a;
77355592Sbostic 	u_short ninos;
77452688Sbostic 	char *p;
77551188Sbostic 
77655940Sbostic 	/*
77755940Sbostic 	 * If there are no buffers other than the segment summary to write
77855940Sbostic 	 * and it is not a checkpoint, don't do anything.  On a checkpoint,
77955940Sbostic 	 * even if there aren't any buffers, you need to write the superblock.
78055940Sbostic 	 */
78157072Smargo 	if ((nblocks = sp->cbpp - sp->bpp) == 1)
78255551Sbostic 		return (0);
78352085Sbostic 
78469290Smckusick 	/* Update the segment usage information. */
78569290Smckusick 	LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
78669290Smckusick 
78769290Smckusick 	/* Loop through all blocks, except the segment summary. */
78869290Smckusick 	for (bpp = sp->bpp; ++bpp < sp->cbpp; )
78969290Smckusick 		sup->su_nbytes += (*bpp)->b_bcount;
79069290Smckusick 
79156159Smargo 	ssp = (SEGSUM *)sp->segsum;
79256159Smargo 
79356159Smargo 	ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
79456159Smargo 	sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode);
79556159Smargo 	sup->su_nbytes += LFS_SUMMARY_SIZE;
79656159Smargo 	sup->su_lastmod = time.tv_sec;
79756159Smargo 	sup->su_ninos += ninos;
79856159Smargo 	++sup->su_nsums;
79956159Smargo 	do_again = !(bp->b_flags & B_GATHERED);
80056159Smargo 	(void)VOP_BWRITE(bp);
80151188Sbostic 	/*
80252085Sbostic 	 * Compute checksum across data and then across summary; the first
80352085Sbostic 	 * block (the summary block) is skipped.  Set the create time here
80452085Sbostic 	 * so that it's guaranteed to be later than the inode mod times.
80551860Sbostic 	 *
80651860Sbostic 	 * XXX
80751860Sbostic 	 * Fix this to do it inline, instead of malloc/copy.
80851188Sbostic 	 */
80951860Sbostic 	datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
81056159Smargo 	for (bpp = sp->bpp, i = nblocks - 1; i--;) {
81156159Smargo 		if ((*++bpp)->b_flags & B_INVAL) {
81256159Smargo 			if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long)))
81356159Smargo 				panic("lfs_writeseg: copyin failed");
81456159Smargo 		} else
81564526Sbostic 			*dp++ = ((u_long *)(*bpp)->b_data)[0];
81656159Smargo 	}
81752103Sbostic 	ssp->ss_create = time.tv_sec;
81855803Sbostic 	ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long));
81952085Sbostic 	ssp->ss_sumsum =
82052085Sbostic 	    cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
82151927Sbostic 	free(datap, M_SEGMENT);
82256159Smargo #ifdef DIAGNOSTIC
82356159Smargo 	if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE)
82456159Smargo 		panic("lfs_writeseg: No diskspace for summary");
82556159Smargo #endif
82655592Sbostic 	fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE);
82754264Sbostic 
82851860Sbostic 	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
82953574Sheideman 	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
83051301Sbostic 
83152688Sbostic 	/*
83252688Sbostic 	 * When we simply write the blocks we lose a rotation for every block
83352688Sbostic 	 * written.  To avoid this problem, we allocate memory in chunks, copy
83457072Smargo 	 * the buffers into the chunk and write the chunk.  MAXPHYS is the
83557072Smargo 	 * largest size I/O devices can handle.
83652688Sbostic 	 * When the data is copied to the chunk, turn off the the B_LOCKED bit
83752688Sbostic 	 * and brelse the buffer (which will move them to the LRU list).  Add
83852688Sbostic 	 * the B_CALL flag to the buffer header so we can count I/O's for the
83952688Sbostic 	 * checkpoints and so we can release the allocated memory.
84052688Sbostic 	 *
84152688Sbostic 	 * XXX
84252688Sbostic 	 * This should be removed if the new virtual memory system allows us to
84352688Sbostic 	 * easily make the buffers contiguous in kernel memory and if that's
84452688Sbostic 	 * fast enough.
84552688Sbostic 	 */
84652688Sbostic 	for (bpp = sp->bpp, i = nblocks; i;) {
84756056Sbostic 		cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp,
84869290Smckusick 		    (*bpp)->b_blkno, MAXPHYS);
84952688Sbostic 		cbp->b_dev = i_dev;
85055940Sbostic 		cbp->b_flags |= B_ASYNC | B_BUSY;
85169290Smckusick 		cbp->b_bcount = 0;
85252688Sbostic 
85352688Sbostic 		s = splbio();
85452688Sbostic 		++fs->lfs_iocount;
85569290Smckusick 		for (p = cbp->b_data; i && cbp->b_bcount < MAXPHYS; i--) {
85669290Smckusick 			bp = *bpp;
85769290Smckusick 			if (bp->b_bcount > (MAXPHYS - cbp->b_bcount))
85869290Smckusick 				break;
85969290Smckusick 			bpp++;
86069290Smckusick 
86155940Sbostic 			/*
86255940Sbostic 			 * Fake buffers from the cleaner are marked as B_INVAL.
86355940Sbostic 			 * We need to copy the data from user space rather than
86455940Sbostic 			 * from the buffer indicated.
86555940Sbostic 			 * XXX == what do I do on an error?
86655940Sbostic 			 */
86755940Sbostic 			if (bp->b_flags & B_INVAL) {
86855940Sbostic 				if (copyin(bp->b_saveaddr, p, bp->b_bcount))
86955940Sbostic 					panic("lfs_writeseg: copyin failed");
87055940Sbostic 			} else
87164526Sbostic 				bcopy(bp->b_data, p, bp->b_bcount);
87252688Sbostic 			p += bp->b_bcount;
87369290Smckusick 			cbp->b_bcount += bp->b_bcount;
87455940Sbostic 			if (bp->b_flags & B_LOCKED)
87555940Sbostic 				--locked_queue_count;
87655940Sbostic 			bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
87754264Sbostic 			     B_LOCKED | B_GATHERED);
87855940Sbostic 			if (bp->b_flags & B_CALL) {
87955940Sbostic 				/* if B_CALL, it was created with newbuf */
88055940Sbostic 				brelvp(bp);
88157072Smargo 				if (!(bp->b_flags & B_INVAL))
88264526Sbostic 					free(bp->b_data, M_SEGMENT);
88355940Sbostic 				free(bp, M_SEGMENT);
88455940Sbostic 			} else {
88552688Sbostic 				bremfree(bp);
88656478Smargo 				bp->b_flags |= B_DONE;
88752688Sbostic 				reassignbuf(bp, bp->b_vp);
88855940Sbostic 				brelse(bp);
88952688Sbostic 			}
89051860Sbostic 		}
89156069Sbostic 		++cbp->b_vp->v_numoutput;
89252688Sbostic 		splx(s);
89356056Sbostic 		/*
89456056Sbostic 		 * XXXX This is a gross and disgusting hack.  Since these
89556056Sbostic 		 * buffers are physically addressed, they hang off the
89656056Sbostic 		 * device vnode (devvp).  As a result, they have no way
89756056Sbostic 		 * of getting to the LFS superblock or lfs structure to
89856056Sbostic 		 * keep track of the number of I/O's pending.  So, I am
89956056Sbostic 		 * going to stuff the fs into the saveaddr field of
90056056Sbostic 		 * the buffer (yuk).
90156056Sbostic 		 */
90256056Sbostic 		cbp->b_saveaddr = (caddr_t)fs;
90353574Sheideman 		vop_strategy_a.a_desc = VDESC(vop_strategy);
90453574Sheideman 		vop_strategy_a.a_bp = cbp;
90553574Sheideman 		(strategy)(&vop_strategy_a);
90651860Sbostic 	}
90757072Smargo 	/*
90857072Smargo 	 * XXX
90957072Smargo 	 * Vinvalbuf can move locked buffers off the locked queue
91057072Smargo 	 * and we have no way of knowing about this.  So, after
91157072Smargo 	 * doing a big write, we recalculate how many bufers are
91257072Smargo 	 * really still left on the locked queue.
91357072Smargo 	 */
91457072Smargo 	locked_queue_count = count_lock_queue();
91557072Smargo 	wakeup(&locked_queue_count);
91657072Smargo #ifdef DOSTATS
91757072Smargo 	++lfs_stats.psegwrites;
91857072Smargo 	lfs_stats.blocktot += nblocks - 1;
91957072Smargo 	if (fs->lfs_sp->seg_flags & SEGM_SYNC)
92057072Smargo 		++lfs_stats.psyncwrites;
92157072Smargo 	if (fs->lfs_sp->seg_flags & SEGM_CLEAN) {
92257072Smargo 		++lfs_stats.pcleanwrites;
92357072Smargo 		lfs_stats.cleanblocks += nblocks - 1;
92457072Smargo 	}
92557072Smargo #endif
92657072Smargo 	return (lfs_initseg(fs) || do_again);
92751188Sbostic }
92851188Sbostic 
92952077Sbostic void
lfs_writesuper(fs)93057072Smargo lfs_writesuper(fs)
93151499Sbostic 	struct lfs *fs;
93251301Sbostic {
93352085Sbostic 	struct buf *bp;
93451860Sbostic 	dev_t i_dev;
93553574Sheideman 	int (*strategy) __P((struct vop_strategy_args *));
93656069Sbostic 	int s;
93754690Sbostic 	struct vop_strategy_args vop_strategy_a;
93851301Sbostic 
93951860Sbostic 	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
94053574Sheideman 	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
94151356Sbostic 
94251342Sbostic 	/* Checksum the superblock and copy it into a buffer. */
94351499Sbostic 	fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
94456056Sbostic 	bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0],
94556056Sbostic 	    LFS_SBPAD);
94664526Sbostic 	*(struct lfs *)bp->b_data = *fs;
94751215Sbostic 
94857072Smargo 	/* XXX Toggle between first two superblocks; for now just write first */
94951860Sbostic 	bp->b_dev = i_dev;
95057072Smargo 	bp->b_flags |= B_BUSY | B_CALL | B_ASYNC;
95157072Smargo 	bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
95257072Smargo 	bp->b_iodone = lfs_supercallback;
95353574Sheideman 	vop_strategy_a.a_desc = VDESC(vop_strategy);
95453574Sheideman 	vop_strategy_a.a_bp = bp;
95556069Sbostic 	s = splbio();
95657072Smargo 	++bp->b_vp->v_numoutput;
95756069Sbostic 	splx(s);
95853574Sheideman 	(strategy)(&vop_strategy_a);
95951215Sbostic }
96051215Sbostic 
96151342Sbostic /*
96251342Sbostic  * Logical block number match routines used when traversing the dirty block
96351342Sbostic  * chain.
96451342Sbostic  */
96552077Sbostic int
lfs_match_data(fs,bp)96652077Sbostic lfs_match_data(fs, bp)
96751860Sbostic 	struct lfs *fs;
96852085Sbostic 	struct buf *bp;
96951215Sbostic {
97051342Sbostic 	return (bp->b_lblkno >= 0);
97151215Sbostic }
97251215Sbostic 
97352077Sbostic int
lfs_match_indir(fs,bp)97452077Sbostic lfs_match_indir(fs, bp)
97551860Sbostic 	struct lfs *fs;
97652085Sbostic 	struct buf *bp;
97751215Sbostic {
97851860Sbostic 	int lbn;
97951860Sbostic 
98051860Sbostic 	lbn = bp->b_lblkno;
98151860Sbostic 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
98251215Sbostic }
98351215Sbostic 
98452077Sbostic int
lfs_match_dindir(fs,bp)98552077Sbostic lfs_match_dindir(fs, bp)
98651860Sbostic 	struct lfs *fs;
98752085Sbostic 	struct buf *bp;
98851215Sbostic {
98951860Sbostic 	int lbn;
99051860Sbostic 
99151860Sbostic 	lbn = bp->b_lblkno;
99251860Sbostic 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
99351215Sbostic }
99451215Sbostic 
99552077Sbostic int
lfs_match_tindir(fs,bp)99652077Sbostic lfs_match_tindir(fs, bp)
99751499Sbostic 	struct lfs *fs;
99852085Sbostic 	struct buf *bp;
99951342Sbostic {
100051860Sbostic 	int lbn;
100151342Sbostic 
100251860Sbostic 	lbn = bp->b_lblkno;
100351860Sbostic 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
100451860Sbostic }
100551342Sbostic 
100651860Sbostic /*
100751860Sbostic  * Allocate a new buffer header.
100851860Sbostic  */
100952085Sbostic struct buf *
lfs_newbuf(vp,daddr,size)101055940Sbostic lfs_newbuf(vp, daddr, size)
101155940Sbostic 	struct vnode *vp;
101268550Smckusick 	ufs_daddr_t daddr;
101351860Sbostic 	size_t size;
101451860Sbostic {
101552085Sbostic 	struct buf *bp;
101655940Sbostic 	size_t nbytes;
101751342Sbostic 
101855940Sbostic 	nbytes = roundup(size, DEV_BSIZE);
101957072Smargo 	bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK);
102057072Smargo 	bzero(bp, sizeof(struct buf));
102157072Smargo 	if (nbytes)
102264526Sbostic 		bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK);
102355940Sbostic 	bgetvp(vp, bp);
102455940Sbostic 	bp->b_bufsize = size;
102555940Sbostic 	bp->b_bcount = size;
102651860Sbostic 	bp->b_lblkno = daddr;
102751860Sbostic 	bp->b_blkno = daddr;
102851860Sbostic 	bp->b_error = 0;
102951860Sbostic 	bp->b_resid = 0;
103055940Sbostic 	bp->b_iodone = lfs_callback;
103156027Sbostic 	bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE;
103251860Sbostic 	return (bp);
103351860Sbostic }
103451342Sbostic 
103553347Sbostic void
lfs_callback(bp)103651860Sbostic lfs_callback(bp)
103752085Sbostic 	struct buf *bp;
103851860Sbostic {
103951860Sbostic 	struct lfs *fs;
104051342Sbostic 
104156056Sbostic 	fs = (struct lfs *)bp->b_saveaddr;
104251860Sbostic #ifdef DIAGNOSTIC
104351860Sbostic 	if (fs->lfs_iocount == 0)
104451860Sbostic 		panic("lfs_callback: zero iocount\n");
104551860Sbostic #endif
104651860Sbostic 	if (--fs->lfs_iocount == 0)
104752688Sbostic 		wakeup(&fs->lfs_iocount);
104851915Sbostic 
104955940Sbostic 	brelvp(bp);
105064526Sbostic 	free(bp->b_data, M_SEGMENT);
105155940Sbostic 	free(bp, M_SEGMENT);
105251860Sbostic }
105351342Sbostic 
105455940Sbostic void
lfs_supercallback(bp)105555940Sbostic lfs_supercallback(bp)
105655940Sbostic 	struct buf *bp;
105755940Sbostic {
105855940Sbostic 	brelvp(bp);
105964526Sbostic 	free(bp->b_data, M_SEGMENT);
106055940Sbostic 	free(bp, M_SEGMENT);
106155940Sbostic }
106255940Sbostic 
106351215Sbostic /*
106451188Sbostic  * Shellsort (diminishing increment sort) from Data Structures and
106551188Sbostic  * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
106651188Sbostic  * see also Knuth Vol. 3, page 84.  The increments are selected from
106751188Sbostic  * formula (8), page 95.  Roughly O(N^3/2).
106851188Sbostic  */
106951188Sbostic /*
107051188Sbostic  * This is our own private copy of shellsort because we want to sort
107151188Sbostic  * two parallel arrays (the array of buffer pointers and the array of
107251188Sbostic  * logical block numbers) simultaneously.  Note that we cast the array
107351188Sbostic  * of logical block numbers to a unsigned in this routine so that the
107451188Sbostic  * negative block numbers (meta data blocks) sort AFTER the data blocks.
107551188Sbostic  */
107652077Sbostic void
lfs_shellsort(bp_array,lb_array,nmemb)107752077Sbostic lfs_shellsort(bp_array, lb_array, nmemb)
107852085Sbostic 	struct buf **bp_array;
107968550Smckusick 	ufs_daddr_t *lb_array;
108051188Sbostic 	register int nmemb;
108151188Sbostic {
108251188Sbostic 	static int __rsshell_increments[] = { 4, 1, 0 };
108351188Sbostic 	register int incr, *incrp, t1, t2;
108452085Sbostic 	struct buf *bp_temp;
108551188Sbostic 	u_long lb_temp;
108651188Sbostic 
108751188Sbostic 	for (incrp = __rsshell_increments; incr = *incrp++;)
108851188Sbostic 		for (t1 = incr; t1 < nmemb; ++t1)
108951188Sbostic 			for (t2 = t1 - incr; t2 >= 0;)
109051188Sbostic 				if (lb_array[t2] > lb_array[t2 + incr]) {
109151188Sbostic 					lb_temp = lb_array[t2];
109251188Sbostic 					lb_array[t2] = lb_array[t2 + incr];
109351188Sbostic 					lb_array[t2 + incr] = lb_temp;
109451188Sbostic 					bp_temp = bp_array[t2];
109551188Sbostic 					bp_array[t2] = bp_array[t2 + incr];
109651188Sbostic 					bp_array[t2 + incr] = bp_temp;
109751188Sbostic 					t2 -= incr;
109851188Sbostic 				} else
109951188Sbostic 					break;
110051188Sbostic }
110155940Sbostic 
110257072Smargo /*
110365242Smckusick  * Check VXLOCK.  Return 1 if the vnode is locked.  Otherwise, vget it.
110457072Smargo  */
lfs_vref(vp)110557072Smargo lfs_vref(vp)
110657072Smargo 	register struct vnode *vp;
110757072Smargo {
110869421Smckusick 	struct proc *p = curproc;	/* XXX */
110957072Smargo 
111069421Smckusick 	if (vp->v_flag & VXLOCK)	/* XXX */
111157072Smargo 		return(1);
111269421Smckusick 	return (vget(vp, 0, p));
111357072Smargo }
111457072Smargo 
1115*69812Smargo /*
1116*69812Smargo  * This is vrele except that we do not want to VOP_INACTIVE this vnode. We
1117*69812Smargo  * inline vrele here to avoid the vn_lock and VOP_INACTIVE call at the end.
1118*69812Smargo  */
111957072Smargo void
lfs_vunref(vp)112057072Smargo lfs_vunref(vp)
112157072Smargo 	register struct vnode *vp;
112257072Smargo {
1123*69812Smargo 	struct proc *p = curproc;				/* XXX */
1124*69812Smargo 	extern struct simplelock vnode_free_list_slock;		/* XXX */
1125*69812Smargo 	extern TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* XXX */
112657072Smargo 
1127*69812Smargo 	simple_lock(&vp->v_interlock);
1128*69812Smargo 	vp->v_usecount--;
1129*69812Smargo 	if (vp->v_usecount > 0) {
1130*69812Smargo 		simple_unlock(&vp->v_interlock);
1131*69812Smargo 		return;
1132*69812Smargo 	}
113357072Smargo 	/*
1134*69812Smargo 	 * insert at tail of LRU list
113557072Smargo 	 */
1136*69812Smargo 	simple_lock(&vnode_free_list_slock);
1137*69812Smargo 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1138*69812Smargo 	simple_unlock(&vnode_free_list_slock);
1139*69812Smargo 	simple_unlock(&vp->v_interlock);
114057072Smargo }
1141