xref: /netbsd-src/sys/ufs/lfs/lfs.h (revision 7fa608457b817eca6e0977b37f758ae064f3c99c)
1 /*	$NetBSD: lfs.h,v 1.122 2007/10/10 20:42:34 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Konrad E. Schroder <perseant@hhhh.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 /*-
39  * Copyright (c) 1991, 1993
40  *	The Regents of the University of California.  All rights reserved.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)lfs.h	8.9 (Berkeley) 5/8/95
67  */
68 
69 #ifndef _UFS_LFS_LFS_H_
70 #define _UFS_LFS_LFS_H_
71 
72 #include <sys/rwlock.h>
73 
74 /*
75  * Compile-time options for LFS.
76  */
77 #define LFS_IFIND_RETRIES  16
78 #define LFS_LOGLENGTH      1024 /* size of debugging log */
79 #define LFS_MAX_ACTIVE	   10	/* Dirty segments before ckp forced */
80 
81 /*
82  * Fixed filesystem layout parameters
83  */
84 #define	LFS_LABELPAD	8192		/* LFS label size */
85 #define	LFS_SBPAD	8192		/* LFS superblock size */
86 
87 #define	LFS_UNUSED_INUM	0		/* 0: out of band inode number */
88 #define	LFS_IFILE_INUM	1		/* 1: IFILE inode number */
89 					/* 2: Root inode number */
90 #define	LOSTFOUNDINO	3		/* 3: lost+found inode number */
91 #define	LFS_FIRST_INUM	4		/* 4: first free inode number */
92 
93 #define	LFS_V1_SUMMARY_SIZE	512     /* V1 fixed summary size */
94 #define	LFS_DFL_SUMMARY_SIZE	512	/* Default summary size */
95 
96 #define LFS_MAX_DADDR	0x7fffffff	/* Highest addressable fsb */
97 
98 #define LFS_MAXNAMLEN	255		/* maximum name length in a dir */
99 
100 /* Adjustable filesystem parameters */
101 #define MIN_FREE_SEGS	20
102 #define MIN_RESV_SEGS	15
103 #ifndef LFS_ATIME_IFILE
104 # define LFS_ATIME_IFILE 0 /* Store atime info in ifile (optional in LFSv1) */
105 #endif
106 #define LFS_MARKV_MAXBLKCNT	65536	/* Max block count for lfs_markv() */
107 
108 /* Misc. definitions */
109 #define BW_CLEAN	1		/* Flag for lfs_bwrite_ext() */
110 #define PG_DELWRI	PG_PAGER1	/* Local def for delayed pageout */
111 
112 /* Resource limits */
113 #define	LFS_MAX_RESOURCE(x, u)	(((x) >> 2) - 10 * (u))
114 #define	LFS_WAIT_RESOURCE(x, u)	(((x) >> 1) - ((x) >> 3) - 10 * (u))
115 #define	LFS_INVERSE_MAX_RESOURCE(x, u)	(((x) + 10 * (u)) << 2)
116 #define LFS_MAX_BUFS	    LFS_MAX_RESOURCE(nbuf, 1)
117 #define LFS_WAIT_BUFS	    LFS_WAIT_RESOURCE(nbuf, 1)
118 #define LFS_INVERSE_MAX_BUFS(n)	LFS_INVERSE_MAX_RESOURCE(n, 1)
119 #define LFS_MAX_BYTES	    LFS_MAX_RESOURCE(bufmem_lowater, PAGE_SIZE)
120 #define LFS_INVERSE_MAX_BYTES(n) LFS_INVERSE_MAX_RESOURCE(n, PAGE_SIZE)
121 #define LFS_WAIT_BYTES	    LFS_WAIT_RESOURCE(bufmem_lowater, PAGE_SIZE)
122 #define LFS_MAX_DIROP	    ((desiredvnodes >> 2) + (desiredvnodes >> 3))
123 #define SIZEOF_DIROP(fs)	(2 * ((fs)->lfs_bsize + DINODE1_SIZE))
124 #define LFS_MAX_FSDIROP(fs)						\
125 	((fs)->lfs_nclean <= (fs)->lfs_resvseg ? 0 :			\
126 	 (((fs)->lfs_nclean - (fs)->lfs_resvseg) * (fs)->lfs_ssize) /	\
127           (2 * SIZEOF_DIROP(fs)))
128 #define LFS_MAX_PAGES	lfs_max_pages()
129 #define LFS_WAIT_PAGES	lfs_wait_pages()
130 #define LFS_BUFWAIT	    2	/* How long to wait if over *_WAIT_* */
131 
132 #ifdef _KERNEL
133 int lfs_wait_pages(void);
134 int lfs_max_pages(void);
135 #endif /* _KERNEL */
136 
137 /* How starved can we be before we start holding back page writes */
138 #define LFS_STARVED_FOR_SEGS(fs) ((fs)->lfs_nclean < (fs)->lfs_resvseg)
139 
140 /*
141  * Reserved blocks for lfs_malloc
142  */
143 
144 /* Structure to keep reserved blocks */
145 typedef struct lfs_res_blk {
146 	void *p;
147 	LIST_ENTRY(lfs_res_blk) res;
148 	int size;
149 	char inuse;
150 } res_t;
151 
152 /* Types for lfs_newbuf and lfs_malloc */
153 #define LFS_NB_UNKNOWN -1
154 #define LFS_NB_SUMMARY	0
155 #define LFS_NB_SBLOCK	1
156 #define LFS_NB_IBLOCK	2
157 #define LFS_NB_CLUSTER	3
158 #define LFS_NB_CLEAN	4
159 #define LFS_NB_BLKIOV	5
160 #define LFS_NB_COUNT	6 /* always last */
161 
162 /* Number of reserved memory blocks of each type */
163 #define LFS_N_SUMMARIES 2
164 #define LFS_N_SBLOCKS	1   /* Always 1, to throttle superblock writes */
165 #define LFS_N_IBLOCKS	16  /* In theory ssize/bsize; in practice around 2 */
166 #define LFS_N_CLUSTERS	16  /* In theory ssize/MAXPHYS */
167 #define LFS_N_CLEAN	0
168 #define LFS_N_BLKIOV	1
169 
170 /* Total count of "large" (non-pool) types */
171 #define LFS_N_TOTAL (LFS_N_SUMMARIES + LFS_N_SBLOCKS + LFS_N_IBLOCKS +	\
172 		     LFS_N_CLUSTERS + LFS_N_CLEAN + LFS_N_BLKIOV)
173 
174 /* Counts for pool types */
175 #define LFS_N_CL	LFS_N_CLUSTERS
176 #define LFS_N_BPP	2
177 #define LFS_N_SEG	2
178 
179 /*
180  * "struct buf" associated definitions
181  */
182 
183 /* Unassigned disk addresses. */
184 #define	UNASSIGNED	-1
185 #define UNWRITTEN	-2
186 
187 /* Unused logical block number */
188 #define LFS_UNUSED_LBN	-1
189 
190 /* Determine if a buffer belongs to the ifile */
191 #define IS_IFILE(bp)	(VTOI(bp->b_vp)->i_number == LFS_IFILE_INUM)
192 
193 # define LFS_LOCK_BUF(bp) do {						\
194 	if (((bp)->b_flags & (B_LOCKED | B_CALL)) == 0) {		\
195 		simple_lock(&lfs_subsys_lock);				\
196 		++locked_queue_count;					\
197 		locked_queue_bytes += bp->b_bufsize;			\
198 		simple_unlock(&lfs_subsys_lock);			\
199 	}								\
200 	(bp)->b_flags |= B_LOCKED;					\
201 } while (0)
202 
203 # define LFS_UNLOCK_BUF(bp) do {					\
204 	if (((bp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) {	\
205 		simple_lock(&lfs_subsys_lock);				\
206 		--locked_queue_count;					\
207 		locked_queue_bytes -= bp->b_bufsize;			\
208 		if (locked_queue_count < LFS_WAIT_BUFS &&		\
209 		    locked_queue_bytes < LFS_WAIT_BYTES)		\
210 			wakeup(&locked_queue_count);			\
211 		simple_unlock(&lfs_subsys_lock);			\
212 	}								\
213 	(bp)->b_flags &= ~B_LOCKED;					\
214 } while (0)
215 
216 #ifdef _KERNEL
217 
218 extern u_long bufmem_lowater, bufmem_hiwater; /* XXX */
219 
220 # define LFS_IS_MALLOC_BUF(bp) (((bp)->b_flags & B_CALL) &&		\
221      (bp)->b_iodone == lfs_callback)
222 
223 # ifdef DEBUG
224 #  define LFS_DEBUG_COUNTLOCKED(m) do {					\
225 	if (lfs_debug_log_subsys[DLOG_LLIST]) {				\
226 		lfs_countlocked(&locked_queue_count, &locked_queue_bytes, (m)); \
227 		wakeup(&locked_queue_count);				\
228 	}								\
229 } while (0)
230 # else
231 #  define LFS_DEBUG_COUNTLOCKED(m)
232 # endif
233 
234 /* log for debugging writes to the Ifile */
235 # ifdef DEBUG
236 struct lfs_log_entry {
237 	const char *op;
238 	const char *file;
239 	int pid;
240 	int line;
241 	daddr_t block;
242 	unsigned long flags;
243 };
244 extern int lfs_lognum;
245 extern struct lfs_log_entry lfs_log[LFS_LOGLENGTH];
246 #  define LFS_BWRITE_LOG(bp) lfs_bwrite_log((bp), __FILE__, __LINE__)
247 #  define LFS_ENTER_LOG(theop, thefile, theline, lbn, theflags, thepid) do {\
248 	int _s;								\
249 									\
250 	simple_lock(&lfs_subsys_lock);					\
251 	_s = splbio();							\
252 	lfs_log[lfs_lognum].op = theop;					\
253 	lfs_log[lfs_lognum].file = thefile;				\
254 	lfs_log[lfs_lognum].line = (theline);				\
255 	lfs_log[lfs_lognum].pid = (thepid);				\
256 	lfs_log[lfs_lognum].block = (lbn);				\
257 	lfs_log[lfs_lognum].flags = (theflags);				\
258 	lfs_lognum = (lfs_lognum + 1) % LFS_LOGLENGTH;			\
259 	splx(_s);							\
260 	simple_unlock(&lfs_subsys_lock);				\
261 } while (0)
262 
263 #  define LFS_BCLEAN_LOG(fs, bp) do {					\
264 	if ((bp)->b_vp == (fs)->lfs_ivnode)				\
265 		LFS_ENTER_LOG("clear", __FILE__, __LINE__,		\
266 			      bp->b_lblkno, bp->b_flags, curproc->p_pid);\
267 } while (0)
268 
269 /* Must match list in lfs_vfsops.c ! */
270 #  define DLOG_RF     0  /* roll forward */
271 #  define DLOG_ALLOC  1  /* inode alloc */
272 #  define DLOG_AVAIL  2  /* lfs_{,r,f}avail */
273 #  define DLOG_FLUSH  3  /* flush */
274 #  define DLOG_LLIST  4  /* locked list accounting */
275 #  define DLOG_WVNODE 5  /* vflush/writevnodes verbose */
276 #  define DLOG_VNODE  6  /* vflush/writevnodes */
277 #  define DLOG_SEG    7  /* segwrite */
278 #  define DLOG_SU     8  /* seguse accounting */
279 #  define DLOG_CLEAN  9  /* cleaner routines */
280 #  define DLOG_MOUNT  10 /* mount/unmount */
281 #  define DLOG_PAGE   11 /* putpages/gop_write */
282 #  define DLOG_DIROP  12 /* dirop accounting */
283 #  define DLOG_MALLOC 13 /* lfs_malloc accounting */
284 #  define DLOG_MAX    14 /* The terminator */
285 #  define DLOG(a) lfs_debug_log a
286 # else /* ! DEBUG */
287 #  define LFS_BCLEAN_LOG(fs, bp)
288 #  define LFS_BWRITE_LOG(bp)		VOP_BWRITE((bp))
289 #  define DLOG(a)
290 # endif /* ! DEBUG */
291 #else /* ! _KERNEL */
292 # define LFS_BWRITE_LOG(bp)		VOP_BWRITE((bp))
293 #endif /* _KERNEL */
294 
295 #ifdef _KERNEL
296 /* Filehandle structure for exported LFSes */
297 struct lfid {
298 	struct ufid lfid_ufid;
299 #define lfid_len lfid_ufid.ufid_len
300 #define lfid_ino lfid_ufid.ufid_ino
301 #define lfid_gen lfid_ufid.ufid_gen
302 	uint32_t lfid_ident;
303 };
304 #endif /* _KERNEL */
305 
306 /*
307  * "struct inode" associated definitions
308  */
309 
310 /* Address calculations for metadata located in the inode */
311 #define	S_INDIR(fs)	-NDADDR
312 #define	D_INDIR(fs)	(S_INDIR(fs) - NINDIR(fs) - 1)
313 #define	T_INDIR(fs)	(D_INDIR(fs) - NINDIR(fs) * NINDIR(fs) - 1)
314 
315 /* For convenience */
316 #define IN_ALLMOD (IN_MODIFIED|IN_ACCESS|IN_CHANGE|IN_UPDATE|IN_MODIFY|IN_ACCESSED|IN_CLEANING)
317 
318 #define LFS_SET_UINO(ip, flags) do {					\
319 	simple_lock(&(ip)->i_lfs->lfs_interlock);			\
320 	if (((flags) & IN_ACCESSED) && !((ip)->i_flag & IN_ACCESSED))	\
321 		++(ip)->i_lfs->lfs_uinodes;				\
322 	if (((flags) & IN_CLEANING) && !((ip)->i_flag & IN_CLEANING))	\
323 		++(ip)->i_lfs->lfs_uinodes;				\
324 	if (((flags) & IN_MODIFIED) && !((ip)->i_flag & IN_MODIFIED))	\
325 		++(ip)->i_lfs->lfs_uinodes;				\
326 	(ip)->i_flag |= (flags);					\
327 	simple_unlock(&(ip)->i_lfs->lfs_interlock);			\
328 } while (0)
329 
330 #define LFS_CLR_UINO(ip, flags) do {					\
331 	simple_lock(&(ip)->i_lfs->lfs_interlock);			\
332 	if (((flags) & IN_ACCESSED) && ((ip)->i_flag & IN_ACCESSED))	\
333 		--(ip)->i_lfs->lfs_uinodes;				\
334 	if (((flags) & IN_CLEANING) && ((ip)->i_flag & IN_CLEANING))	\
335 		--(ip)->i_lfs->lfs_uinodes;				\
336 	if (((flags) & IN_MODIFIED) && ((ip)->i_flag & IN_MODIFIED))	\
337 		--(ip)->i_lfs->lfs_uinodes;				\
338 	(ip)->i_flag &= ~(flags);					\
339 	if ((ip)->i_lfs->lfs_uinodes < 0) {				\
340 		panic("lfs_uinodes < 0");				\
341 	}								\
342 	simple_unlock(&(ip)->i_lfs->lfs_interlock);			\
343 } while (0)
344 
345 #define LFS_ITIMES(ip, acc, mod, cre) \
346 	while ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY)) \
347 		lfs_itimes(ip, acc, mod, cre)
348 
349 /*
350  * "struct vnode" associated definitions
351  */
352 
353 /* Heuristic emptiness measure */
354 #define VPISEMPTY(vp)	 (LIST_EMPTY(&(vp)->v_dirtyblkhd) && 		\
355 			  !(vp->v_type == VREG && (vp)->v_iflag & VI_ONWORKLST) &&\
356 			  VTOI(vp)->i_lfs_nbtree == 0)
357 
358 #define WRITEINPROG(vp) ((vp)->v_numoutput > 0 ||			\
359 	(!LIST_EMPTY(&(vp)->v_dirtyblkhd) &&				\
360 	 !(VTOI(vp)->i_flag & (IN_MODIFIED | IN_ACCESSED | IN_CLEANING))))
361 
362 
363 /*
364  * On-disk and in-memory checkpoint segment usage structure.
365  */
366 typedef struct segusage SEGUSE;
367 struct segusage {
368 	u_int32_t su_nbytes;		/* 0: number of live bytes */
369 	u_int32_t su_olastmod;		/* 4: SEGUSE last modified timestamp */
370 	u_int16_t su_nsums;		/* 8: number of summaries in segment */
371 	u_int16_t su_ninos;		/* 10: number of inode blocks in seg */
372 
373 #define	SEGUSE_ACTIVE		0x01	/*  segment currently being written */
374 #define	SEGUSE_DIRTY		0x02	/*  segment has data in it */
375 #define	SEGUSE_SUPERBLOCK	0x04	/*  segment contains a superblock */
376 #define SEGUSE_ERROR		0x08	/*  cleaner: do not clean segment */
377 #define SEGUSE_EMPTY		0x10	/*  segment is empty */
378 #define SEGUSE_INVAL		0x20	/*  segment is invalid */
379 	u_int32_t su_flags;		/* 12: segment flags */
380 	u_int64_t su_lastmod;		/* 16: last modified timestamp */
381 };
382 
383 typedef struct segusage_v1 SEGUSE_V1;
384 struct segusage_v1 {
385 	u_int32_t su_nbytes;		/* 0: number of live bytes */
386 	u_int32_t su_lastmod;		/* 4: SEGUSE last modified timestamp */
387 	u_int16_t su_nsums;		/* 8: number of summaries in segment */
388 	u_int16_t su_ninos;		/* 10: number of inode blocks in seg */
389 	u_int32_t su_flags;		/* 12: segment flags  */
390 };
391 
392 #define	SEGUPB(fs)	(fs->lfs_sepb)
393 #define	SEGTABSIZE_SU(fs)						\
394 	(((fs)->lfs_nseg + SEGUPB(fs) - 1) / (fs)->lfs_sepb)
395 
396 #ifdef _KERNEL
397 # define SHARE_IFLOCK(F) 						\
398   do {									\
399 	simple_lock(&(F)->lfs_interlock);				\
400 	lockmgr(&(F)->lfs_iflock, LK_SHARED, &(F)->lfs_interlock);	\
401 	simple_unlock(&(F)->lfs_interlock);				\
402   } while(0)
403 # define UNSHARE_IFLOCK(F)						\
404   do {									\
405 	simple_lock(&(F)->lfs_interlock);				\
406 	lockmgr(&(F)->lfs_iflock, LK_RELEASE, &(F)->lfs_interlock);	\
407 	simple_unlock(&(F)->lfs_interlock);				\
408   } while(0)
409 #else /* ! _KERNEL */
410 # define SHARE_IFLOCK(F)
411 # define UNSHARE_IFLOCK(F)
412 #endif /* ! _KERNEL */
413 
414 /* Read in the block with a specific segment usage entry from the ifile. */
415 #define	LFS_SEGENTRY(SP, F, IN, BP) do {				\
416 	int _e;								\
417 	SHARE_IFLOCK(F);						\
418 	VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS;			\
419 	if ((_e = bread((F)->lfs_ivnode,				\
420 	    ((IN) / (F)->lfs_sepb) + (F)->lfs_cleansz,			\
421 	    (F)->lfs_bsize, NOCRED, &(BP))) != 0)			\
422 		panic("lfs: ifile read: %d", _e);			\
423 	if ((F)->lfs_version == 1)					\
424 		(SP) = (SEGUSE *)((SEGUSE_V1 *)(BP)->b_data +		\
425 			((IN) & ((F)->lfs_sepb - 1)));			\
426 	else								\
427 		(SP) = (SEGUSE *)(BP)->b_data + ((IN) % (F)->lfs_sepb);	\
428 	UNSHARE_IFLOCK(F);						\
429 } while (0)
430 
431 #define LFS_WRITESEGENTRY(SP, F, IN, BP) do {				\
432 	if ((SP)->su_nbytes == 0)					\
433 		(SP)->su_flags |= SEGUSE_EMPTY;				\
434 	else								\
435 		(SP)->su_flags &= ~SEGUSE_EMPTY;			\
436 	(F)->lfs_suflags[(F)->lfs_activesb][(IN)] = (SP)->su_flags;	\
437 	LFS_BWRITE_LOG(BP);						\
438 } while (0)
439 
440 /*
441  * On-disk file information.  One per file with data blocks in the segment.
442  */
443 typedef struct finfo FINFO;
444 struct finfo {
445 	u_int32_t fi_nblocks;		/* number of blocks */
446 	u_int32_t fi_version;		/* version number */
447 	u_int32_t fi_ino;		/* inode number */
448 	u_int32_t fi_lastlength;	/* length of last block in array */
449 	int32_t	  fi_blocks[1];		/* array of logical block numbers */
450 };
451 /* sizeof FINFO except fi_blocks */
452 #define	FINFOSIZE	(sizeof(FINFO) - sizeof(int32_t))
453 
454 /*
455  * Index file inode entries.
456  */
457 typedef struct ifile IFILE;
458 struct ifile {
459 	u_int32_t if_version;		/* inode version number */
460 #define	LFS_UNUSED_DADDR	0	/* out-of-band daddr */
461 	int32_t	  if_daddr;		/* inode disk address */
462 #define LFS_ORPHAN_NEXTFREE	(~(u_int32_t)0) /* indicate orphaned file */
463 	u_int32_t if_nextfree;		/* next-unallocated inode */
464 	u_int32_t if_atime_sec;		/* Last access time, seconds */
465 	u_int32_t if_atime_nsec;	/* and nanoseconds */
466 };
467 
468 typedef struct ifile_v1 IFILE_V1;
469 struct ifile_v1 {
470 	u_int32_t if_version;		/* inode version number */
471 	int32_t	  if_daddr;		/* inode disk address */
472 	u_int32_t if_nextfree;		/* next-unallocated inode */
473 #if LFS_ATIME_IFILE
474 	struct timespec if_atime;	/* Last access time */
475 #endif
476 };
477 
478 /*
479  * LFSv1 compatibility code is not allowed to touch if_atime, since it
480  * may not be mapped!
481  */
482 /* Read in the block with a specific inode from the ifile. */
483 #define	LFS_IENTRY(IP, F, IN, BP) do {					\
484 	int _e;								\
485 	SHARE_IFLOCK(F);						\
486 	VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS;			\
487 	if ((_e = bread((F)->lfs_ivnode,				\
488 	(IN) / (F)->lfs_ifpb + (F)->lfs_cleansz + (F)->lfs_segtabsz,	\
489 	(F)->lfs_bsize, NOCRED, &(BP))) != 0)				\
490 		panic("lfs: ifile ino %d read %d", (int)(IN), _e);	\
491 	if ((F)->lfs_version == 1)					\
492 		(IP) = (IFILE *)((IFILE_V1 *)(BP)->b_data +		\
493 				 (IN) % (F)->lfs_ifpb);			\
494 	else								\
495 		(IP) = (IFILE *)(BP)->b_data + (IN) % (F)->lfs_ifpb;	\
496 	UNSHARE_IFLOCK(F);						\
497 } while (0)
498 
499 /*
500  * Cleaner information structure.  This resides in the ifile and is used
501  * to pass information from the kernel to the cleaner.
502  */
503 typedef struct _cleanerinfo {
504 	u_int32_t clean;		/* number of clean segments */
505 	u_int32_t dirty;		/* number of dirty segments */
506 	int32_t   bfree;		/* disk blocks free */
507 	int32_t	  avail;		/* disk blocks available */
508 	u_int32_t free_head;		/* head of the inode free list */
509 	u_int32_t free_tail;		/* tail of the inode free list */
510 #define LFS_CLEANER_MUST_CLEAN	0x01
511 	u_int32_t flags;		/* status word from the kernel */
512 } CLEANERINFO;
513 
514 #define	CLEANSIZE_SU(fs)						\
515 	((sizeof(CLEANERINFO) + (fs)->lfs_bsize - 1) >> (fs)->lfs_bshift)
516 
517 /* Read in the block with the cleaner info from the ifile. */
518 #define LFS_CLEANERINFO(CP, F, BP) do {					\
519 	SHARE_IFLOCK(F);						\
520 	VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS;			\
521 	if (bread((F)->lfs_ivnode,					\
522 	    (daddr_t)0, (F)->lfs_bsize, NOCRED, &(BP)))			\
523 		panic("lfs: ifile read");				\
524 	(CP) = (CLEANERINFO *)(BP)->b_data;				\
525 	UNSHARE_IFLOCK(F);						\
526 } while (0)
527 
528 /*
529  * Synchronize the Ifile cleaner info with current avail and bfree.
530  */
531 #define LFS_SYNC_CLEANERINFO(cip, fs, bp, w) do {		 	\
532     simple_lock(&(fs)->lfs_interlock);					\
533     if ((w) || (cip)->bfree != (fs)->lfs_bfree ||		 	\
534 	(cip)->avail != (fs)->lfs_avail - (fs)->lfs_ravail - 		\
535 	(fs)->lfs_favail) {	 					\
536 	(cip)->bfree = (fs)->lfs_bfree;				 	\
537 	(cip)->avail = (fs)->lfs_avail - (fs)->lfs_ravail -		\
538 		(fs)->lfs_favail;				 	\
539 	if (((bp)->b_flags & B_GATHERED) == 0) {		 	\
540 		(fs)->lfs_flags |= LFS_IFDIRTY;			 	\
541 	}								\
542 	simple_unlock(&(fs)->lfs_interlock);				\
543 	(void) LFS_BWRITE_LOG(bp); /* Ifile */			 	\
544     } else {							 	\
545 	simple_unlock(&(fs)->lfs_interlock);				\
546 	brelse(bp, 0);						 	\
547     }									\
548 } while (0)
549 
550 /*
551  * Get the head of the inode free list.
552  * Always caled with the segment lock held.
553  */
554 #define LFS_GET_HEADFREE(FS, CIP, BP, FREEP) do {			\
555 	if ((FS)->lfs_version > 1) {					\
556 		LFS_CLEANERINFO((CIP), (FS), (BP));			\
557 		(FS)->lfs_freehd = (CIP)->free_head;			\
558 		brelse(BP, 0);						\
559 	}								\
560 	*(FREEP) = (FS)->lfs_freehd;					\
561 } while (0)
562 
563 #define LFS_PUT_HEADFREE(FS, CIP, BP, VAL) do {				\
564 	(FS)->lfs_freehd = (VAL);					\
565 	if ((FS)->lfs_version > 1) {					\
566 		LFS_CLEANERINFO((CIP), (FS), (BP));			\
567 		(CIP)->free_head = (VAL);				\
568 		LFS_BWRITE_LOG(BP);					\
569 		simple_lock(&fs->lfs_interlock);			\
570 		(FS)->lfs_flags |= LFS_IFDIRTY;				\
571 		simple_unlock(&fs->lfs_interlock);			\
572 	}								\
573 } while (0)
574 
575 #define LFS_GET_TAILFREE(FS, CIP, BP, FREEP) do {			\
576 	LFS_CLEANERINFO((CIP), (FS), (BP));				\
577 	*(FREEP) = (CIP)->free_tail;					\
578 	brelse(BP, 0);							\
579 } while (0)
580 
581 #define LFS_PUT_TAILFREE(FS, CIP, BP, VAL) do {				\
582 	LFS_CLEANERINFO((CIP), (FS), (BP));				\
583 	(CIP)->free_tail = (VAL);					\
584 	LFS_BWRITE_LOG(BP);						\
585 	simple_lock(&fs->lfs_interlock);				\
586 	(FS)->lfs_flags |= LFS_IFDIRTY;					\
587 	simple_unlock(&fs->lfs_interlock);				\
588 } while (0)
589 
590 /*
591  * On-disk segment summary information
592  */
593 typedef struct segsum_v1 SEGSUM_V1;
594 struct segsum_v1 {
595 	u_int32_t ss_sumsum;		/* 0: check sum of summary block */
596 	u_int32_t ss_datasum;		/* 4: check sum of data */
597 	u_int32_t ss_magic;		/* 8: segment summary magic number */
598 #define SS_MAGIC	0x061561
599 	int32_t	  ss_next;		/* 12: next segment */
600 	u_int32_t ss_create;		/* 16: creation time stamp */
601 	u_int16_t ss_nfinfo;		/* 20: number of file info structures */
602 	u_int16_t ss_ninos;		/* 22: number of inodes in summary */
603 
604 #define	SS_DIROP	0x01		/* segment begins a dirop */
605 #define	SS_CONT		0x02		/* more partials to finish this write*/
606 #define	SS_CLEAN	0x04		/* written by the cleaner */
607 #define	SS_RFW		0x08		/* written by the roll-forward agent */
608 	u_int16_t ss_flags;		/* 24: used for directory operations */
609 	u_int16_t ss_pad;		/* 26: extra space */
610 	/* FINFO's and inode daddr's... */
611 };
612 
613 typedef struct segsum SEGSUM;
614 struct segsum {
615 	u_int32_t ss_sumsum;		/* 0: check sum of summary block */
616 	u_int32_t ss_datasum;		/* 4: check sum of data */
617 	u_int32_t ss_magic;		/* 8: segment summary magic number */
618 	int32_t	  ss_next;		/* 12: next segment */
619 	u_int32_t ss_ident;		/* 16: roll-forward fsid */
620 #define ss_ocreate ss_ident /* ident is where create was in v1 */
621 	u_int16_t ss_nfinfo;		/* 20: number of file info structures */
622 	u_int16_t ss_ninos;		/* 22: number of inodes in summary */
623 	u_int16_t ss_flags;		/* 24: used for directory operations */
624 	u_int8_t  ss_pad[6];		/* 26: extra space */
625 	u_int64_t ss_serial;		/* 32: serial number */
626 	u_int64_t ss_create;		/* 40: time stamp */
627 	/* FINFO's and inode daddr's... */
628 };
629 
630 #define SEGSUM_SIZE(fs) ((fs)->lfs_version == 1 ? sizeof(SEGSUM_V1) : sizeof(SEGSUM))
631 
632 
633 /*
634  * On-disk super block.
635  */
636 struct dlfs {
637 #define	       LFS_MAGIC       0x070162
638 	u_int32_t dlfs_magic;	  /* 0: magic number */
639 #define	       LFS_VERSION     2
640 	u_int32_t dlfs_version;	  /* 4: version number */
641 
642 	u_int32_t dlfs_size;	  /* 8: number of blocks in fs (v1) */
643 				  /*	number of frags in fs (v2) */
644 	u_int32_t dlfs_ssize;	  /* 12: number of blocks per segment (v1) */
645 				  /*	 number of bytes per segment (v2) */
646 	u_int32_t dlfs_dsize;	  /* 16: number of disk blocks in fs */
647 	u_int32_t dlfs_bsize;	  /* 20: file system block size */
648 	u_int32_t dlfs_fsize;	  /* 24: size of frag blocks in fs */
649 	u_int32_t dlfs_frag;	  /* 28: number of frags in a block in fs */
650 
651 /* Checkpoint region. */
652 	u_int32_t dlfs_freehd;	  /* 32: start of the free list */
653 	int32_t   dlfs_bfree;	  /* 36: number of free disk blocks */
654 	u_int32_t dlfs_nfiles;	  /* 40: number of allocated inodes */
655 	int32_t	  dlfs_avail;	  /* 44: blocks available for writing */
656 	int32_t	  dlfs_uinodes;	  /* 48: inodes in cache not yet on disk */
657 	int32_t	  dlfs_idaddr;	  /* 52: inode file disk address */
658 	u_int32_t dlfs_ifile;	  /* 56: inode file inode number */
659 	int32_t	  dlfs_lastseg;	  /* 60: address of last segment written */
660 	int32_t	  dlfs_nextseg;	  /* 64: address of next segment to write */
661 	int32_t	  dlfs_curseg;	  /* 68: current segment being written */
662 	int32_t	  dlfs_offset;	  /* 72: offset in curseg for next partial */
663 	int32_t	  dlfs_lastpseg;  /* 76: address of last partial written */
664 	u_int32_t dlfs_inopf;	  /* 80: v1: time stamp; v2: inodes per frag */
665 #define dlfs_otstamp dlfs_inopf
666 
667 /* These are configuration parameters. */
668 	u_int32_t dlfs_minfree;	  /* 84: minimum percentage of free blocks */
669 
670 /* These fields can be computed from the others. */
671 	u_int64_t dlfs_maxfilesize; /* 88: maximum representable file size */
672 	u_int32_t dlfs_fsbpseg;	    /* 96: fsb per segment */
673 	u_int32_t dlfs_inopb;	  /* 100: inodes per block */
674 	u_int32_t dlfs_ifpb;	  /* 104: IFILE entries per block */
675 	u_int32_t dlfs_sepb;	  /* 108: SEGUSE entries per block */
676 	u_int32_t dlfs_nindir;	  /* 112: indirect pointers per block */
677 	u_int32_t dlfs_nseg;	  /* 116: number of segments */
678 	u_int32_t dlfs_nspf;	  /* 120: number of sectors per fragment */
679 	u_int32_t dlfs_cleansz;	  /* 124: cleaner info size in blocks */
680 	u_int32_t dlfs_segtabsz;  /* 128: segment table size in blocks */
681 	u_int32_t dlfs_segmask;	  /* 132: calculate offset within a segment */
682 	u_int32_t dlfs_segshift;  /* 136: fast mult/div for segments */
683 	u_int32_t dlfs_bshift;	  /* 140: calc block number from file offset */
684 	u_int32_t dlfs_ffshift;	  /* 144: fast mult/div for frag from file */
685 	u_int32_t dlfs_fbshift;	  /* 148: fast mult/div for frag from block */
686 	u_int64_t dlfs_bmask;	  /* 152: calc block offset from file offset */
687 	u_int64_t dlfs_ffmask;	  /* 160: calc frag offset from file offset */
688 	u_int64_t dlfs_fbmask;	  /* 168: calc frag offset from block offset */
689 	u_int32_t dlfs_blktodb;	  /* 176: blktodb and dbtoblk shift constant */
690 	u_int32_t dlfs_sushift;	  /* 180: fast mult/div for segusage table */
691 
692 	int32_t	  dlfs_maxsymlinklen; /* 184: max length of an internal symlink */
693 #define LFS_MIN_SBINTERVAL     5  /* minimum superblock segment spacing */
694 #define LFS_MAXNUMSB	       10 /* 188: superblock disk offsets */
695 	int32_t	   dlfs_sboffs[LFS_MAXNUMSB];
696 
697 	u_int32_t dlfs_nclean;	  /* 228: Number of clean segments */
698 	u_char	  dlfs_fsmnt[MNAMELEN];	 /* 232: name mounted on */
699 #define LFS_PF_CLEAN 0x1
700 	u_int16_t dlfs_pflags;	  /* 322: file system persistent flags */
701 	int32_t	  dlfs_dmeta;	  /* 324: total number of dirty summaries */
702 	u_int32_t dlfs_minfreeseg; /* 328: segments not counted in bfree */
703 	u_int32_t dlfs_sumsize;	  /* 332: size of summary blocks */
704 	u_int64_t dlfs_serial;	  /* 336: serial number */
705 	u_int32_t dlfs_ibsize;	  /* 344: size of inode blocks */
706 	int32_t	  dlfs_start;	  /* 348: start of segment 0 */
707 	u_int64_t dlfs_tstamp;	  /* 352: time stamp */
708 #define LFS_44INODEFMT 0
709 #define LFS_MAXINODEFMT 0
710 	u_int32_t dlfs_inodefmt;  /* 360: inode format version */
711 	u_int32_t dlfs_interleave; /* 364: segment interleave */
712 	u_int32_t dlfs_ident;	  /* 368: per-fs identifier */
713 	u_int32_t dlfs_fsbtodb;	  /* 372: fsbtodb abd dbtodsb shift constant */
714 	u_int32_t dlfs_resvseg;   /* 376: segments reserved for the cleaner */
715 	int8_t	  dlfs_pad[128];  /* 380: round to 512 bytes */
716 /* Checksum -- last valid disk field. */
717 	u_int32_t dlfs_cksum;	  /* 508: checksum for superblock checking */
718 };
719 
720 /* Type used for the inode bitmap */
721 typedef u_int32_t lfs_bm_t;
722 
723 /*
724  * Linked list of segments whose byte count needs updating following a
725  * file truncation.
726  */
727 struct segdelta {
728 	long segnum;
729 	size_t num;
730 	LIST_ENTRY(segdelta) list;
731 };
732 
733 /*
734  * In-memory super block.
735  */
736 struct lfs {
737 	struct dlfs lfs_dlfs;		/* on-disk parameters */
738 #define lfs_magic lfs_dlfs.dlfs_magic
739 #define lfs_version lfs_dlfs.dlfs_version
740 #define lfs_size lfs_dlfs.dlfs_size
741 #define lfs_ssize lfs_dlfs.dlfs_ssize
742 #define lfs_dsize lfs_dlfs.dlfs_dsize
743 #define lfs_bsize lfs_dlfs.dlfs_bsize
744 #define lfs_fsize lfs_dlfs.dlfs_fsize
745 #define lfs_frag lfs_dlfs.dlfs_frag
746 #define lfs_freehd lfs_dlfs.dlfs_freehd
747 #define lfs_bfree lfs_dlfs.dlfs_bfree
748 #define lfs_nfiles lfs_dlfs.dlfs_nfiles
749 #define lfs_avail lfs_dlfs.dlfs_avail
750 #define lfs_uinodes lfs_dlfs.dlfs_uinodes
751 #define lfs_idaddr lfs_dlfs.dlfs_idaddr
752 #define lfs_ifile lfs_dlfs.dlfs_ifile
753 #define lfs_lastseg lfs_dlfs.dlfs_lastseg
754 #define lfs_nextseg lfs_dlfs.dlfs_nextseg
755 #define lfs_curseg lfs_dlfs.dlfs_curseg
756 #define lfs_offset lfs_dlfs.dlfs_offset
757 #define lfs_lastpseg lfs_dlfs.dlfs_lastpseg
758 #define lfs_otstamp lfs_dlfs.dlfs_inopf
759 #define lfs_inopf lfs_dlfs.dlfs_inopf
760 #define lfs_minfree lfs_dlfs.dlfs_minfree
761 #define lfs_maxfilesize lfs_dlfs.dlfs_maxfilesize
762 #define lfs_fsbpseg lfs_dlfs.dlfs_fsbpseg
763 #define lfs_inopb lfs_dlfs.dlfs_inopb
764 #define lfs_ifpb lfs_dlfs.dlfs_ifpb
765 #define lfs_sepb lfs_dlfs.dlfs_sepb
766 #define lfs_nindir lfs_dlfs.dlfs_nindir
767 #define lfs_nseg lfs_dlfs.dlfs_nseg
768 #define lfs_nspf lfs_dlfs.dlfs_nspf
769 #define lfs_cleansz lfs_dlfs.dlfs_cleansz
770 #define lfs_segtabsz lfs_dlfs.dlfs_segtabsz
771 #define lfs_segmask lfs_dlfs.dlfs_segmask
772 #define lfs_segshift lfs_dlfs.dlfs_segshift
773 #define lfs_bmask lfs_dlfs.dlfs_bmask
774 #define lfs_bshift lfs_dlfs.dlfs_bshift
775 #define lfs_ffmask lfs_dlfs.dlfs_ffmask
776 #define lfs_ffshift lfs_dlfs.dlfs_ffshift
777 #define lfs_fbmask lfs_dlfs.dlfs_fbmask
778 #define lfs_fbshift lfs_dlfs.dlfs_fbshift
779 #define lfs_blktodb lfs_dlfs.dlfs_blktodb
780 #define lfs_fsbtodb lfs_dlfs.dlfs_fsbtodb
781 #define lfs_sushift lfs_dlfs.dlfs_sushift
782 #define lfs_maxsymlinklen lfs_dlfs.dlfs_maxsymlinklen
783 #define lfs_sboffs lfs_dlfs.dlfs_sboffs
784 #define lfs_cksum lfs_dlfs.dlfs_cksum
785 #define lfs_pflags lfs_dlfs.dlfs_pflags
786 #define lfs_fsmnt lfs_dlfs.dlfs_fsmnt
787 #define lfs_nclean lfs_dlfs.dlfs_nclean
788 #define lfs_dmeta lfs_dlfs.dlfs_dmeta
789 #define lfs_minfreeseg lfs_dlfs.dlfs_minfreeseg
790 #define lfs_sumsize lfs_dlfs.dlfs_sumsize
791 #define lfs_serial lfs_dlfs.dlfs_serial
792 #define lfs_ibsize lfs_dlfs.dlfs_ibsize
793 #define lfs_start lfs_dlfs.dlfs_start
794 #define lfs_tstamp lfs_dlfs.dlfs_tstamp
795 #define lfs_inodefmt lfs_dlfs.dlfs_inodefmt
796 #define lfs_interleave lfs_dlfs.dlfs_interleave
797 #define lfs_ident lfs_dlfs.dlfs_ident
798 #define lfs_resvseg lfs_dlfs.dlfs_resvseg
799 
800 /* These fields are set at mount time and are meaningless on disk. */
801 	struct segment *lfs_sp;		/* current segment being written */
802 	struct vnode *lfs_ivnode;	/* vnode for the ifile */
803 	u_int32_t  lfs_seglock;		/* single-thread the segment writer */
804 	pid_t	  lfs_lockpid;		/* pid of lock holder */
805 	lwpid_t	  lfs_locklwp;		/* lwp of lock holder */
806 	u_int32_t lfs_iocount;		/* number of ios pending */
807 	u_int32_t lfs_writer;		/* don't allow any dirops to start */
808 	u_int32_t lfs_dirops;		/* count of active directory ops */
809 	u_int32_t lfs_dirvcount;	/* count of VDIROP nodes in this fs */
810 	u_int32_t lfs_doifile;		/* Write ifile blocks on next write */
811 	u_int32_t lfs_nactive;		/* Number of segments since last ckp */
812 	int8_t	  lfs_fmod;		/* super block modified flag */
813 	int8_t	  lfs_ronly;		/* mounted read-only flag */
814 #define LFS_NOTYET  0x01
815 #define LFS_IFDIRTY 0x02
816 #define LFS_WARNED  0x04
817 #define LFS_UNDIROP 0x08
818 	int8_t	  lfs_flags;		/* currently unused flag */
819 	u_int16_t lfs_activesb;		/* toggle between superblocks */
820 	daddr_t	  lfs_sbactive;		/* disk address of current sb write */
821 	struct vnode *lfs_flushvp;	/* vnode being flushed */
822 	int lfs_flushvp_fakevref;	/* fake vref count for flushvp */
823 	struct vnode *lfs_unlockvp;	/* being inactivated in lfs_segunlock */
824 	u_int32_t lfs_diropwait;	/* # procs waiting on dirop flush */
825 	size_t lfs_devbsize;		/* Device block size */
826 	size_t lfs_devbshift;		/* Device block shift */
827 	krwlock_t lfs_fraglock;
828 	struct lock lfs_iflock;		/* Ifile lock */
829 	struct lock lfs_stoplock;	/* Wrap lock */
830 	pid_t lfs_rfpid;		/* Process ID of roll-forward agent */
831 	int	  lfs_nadirop;		/* number of active dirop nodes */
832 	long	  lfs_ravail;		/* blocks pre-reserved for writing */
833 	long	  lfs_favail;		/* blocks pre-reserved for writing */
834 	res_t *lfs_resblk;		/* Reserved memory for pageout */
835 	TAILQ_HEAD(, inode) lfs_dchainhd; /* dirop vnodes */
836 	TAILQ_HEAD(, inode) lfs_pchainhd; /* paging vnodes */
837 #define LFS_RESHASH_WIDTH 17
838 	LIST_HEAD(, lfs_res_blk) lfs_reshash[LFS_RESHASH_WIDTH];
839 	int	  lfs_pdflush;		 /* pagedaemon wants us to flush */
840 	u_int32_t **lfs_suflags;	/* Segment use flags */
841 #ifdef _KERNEL
842 	struct pool lfs_clpool;		/* Pool for struct lfs_cluster */
843 	struct pool lfs_bpppool;	/* Pool for bpp */
844 	struct pool lfs_segpool;	/* Pool for struct segment */
845 #endif /* _KERNEL */
846 #define LFS_MAX_CLEANIND 64
847 	int32_t  lfs_cleanint[LFS_MAX_CLEANIND]; /* Active cleaning intervals */
848 	int 	 lfs_cleanind;	/* Index into intervals */
849 	struct simplelock lfs_interlock;  /* lock for lfs_seglock */
850 	int lfs_sleepers;		/* # procs sleeping this fs */
851 	int lfs_pages;			/* dirty pages blaming this fs */
852 	lfs_bm_t *lfs_ino_bitmap;	/* Inuse inodes bitmap */
853 	int lfs_nowrap;			/* Suspend log wrap */
854 	int lfs_wrappass;		/* Allow first log wrap requester to pass */
855 	int lfs_wrapstatus;		/* Wrap status */
856 	LIST_HEAD(, segdelta) lfs_segdhd;	/* List of pending trunc accounting events */
857 };
858 
859 /* NINDIR is the number of indirects in a file system block. */
860 #define	NINDIR(fs)	((fs)->lfs_nindir)
861 
862 /* INOPB is the number of inodes in a secondary storage block. */
863 #define	INOPB(fs)	((fs)->lfs_inopb)
864 /* INOPF is the number of inodes in a fragment. */
865 #define INOPF(fs)	((fs)->lfs_inopf)
866 
867 #define	blksize(fs, ip, lbn) \
868 	(((lbn) >= NDADDR || (ip)->i_ffs1_size >= ((lbn) + 1) << (fs)->lfs_bshift) \
869 	    ? (fs)->lfs_bsize \
870 	    : (fragroundup(fs, blkoff(fs, (ip)->i_ffs1_size))))
871 #define	blkoff(fs, loc)		((int)((loc) & (fs)->lfs_bmask))
872 #define fragoff(fs, loc)    /* calculates (loc % fs->lfs_fsize) */ \
873     ((int)((loc) & (fs)->lfs_ffmask))
874 #define	fsbtodb(fs, b)		((b) << (fs)->lfs_fsbtodb)
875 #define	dbtofsb(fs, b)		((b) >> (fs)->lfs_fsbtodb)
876 #define fragstodb(fs, b)	((b) << ((fs)->lfs_blktodb - (fs)->lfs_fbshift))
877 #define dbtofrags(fs, b)	((b) >> ((fs)->lfs_blktodb - (fs)->lfs_fbshift))
878 #define	lblkno(fs, loc)		((loc) >> (fs)->lfs_bshift)
879 #define	lblktosize(fs, blk)	((blk) << (fs)->lfs_bshift)
880 /* Same as above, but named like dbtob(), btodb() */
881 #define fsbtob(fs, b)		((b) << ((fs)->lfs_bshift - \
882 				(fs)->lfs_blktodb + (fs)->lfs_fsbtodb))
883 #define btofsb(fs, b)		((b) >> ((fs)->lfs_bshift - \
884 				(fs)->lfs_blktodb + (fs)->lfs_fsbtodb))
885 #define fsbtofrags(fs, b)	((b) >> ((fs)->lfs_blktodb - (fs)->lfs_fbshift - \
886 				(fs)->lfs_fsbtodb))
887 #define fragstofsb(fs, b)	((b) << ((fs)->lfs_blktodb - (fs)->lfs_fbshift - \
888 				(fs)->lfs_fsbtodb))
889 #define btofrags(fs, b)		((b) >> (fs)->lfs_ffshift)
890 #define numfrags(fs, loc)	/* calculates (loc / fs->lfs_fsize) */	\
891 	((loc) >> (fs)->lfs_ffshift)
892 #define blkroundup(fs, size)	/* calculates roundup(size, fs->lfs_bsize) */ \
893 	((off_t)(((size) + (fs)->lfs_bmask) & (~(fs)->lfs_bmask)))
894 #define fragroundup(fs, size)	/* calculates roundup(size, fs->lfs_fsize) */ \
895 	((off_t)(((size) + (fs)->lfs_ffmask) & (~(fs)->lfs_ffmask)))
896 #define fragstoblks(fs, frags)	/* calculates (frags / fs->lfs_frag) */	\
897 	((frags) >> (fs)->lfs_fbshift)
898 #define blkstofrags(fs, blks)	/* calculates (blks * fs->lfs_frag) */	\
899 	((blks) << (fs)->lfs_fbshift)
900 #define fragnum(fs, fsb)	/* calculates (fsb % fs->lfs_frag) */	\
901 	((fsb) & ((fs)->lfs_frag - 1))
902 #define blknum(fs, fsb)		/* calculates rounddown(fsb, fs->lfs_frag) */ \
903 	((fsb) &~ ((fs)->lfs_frag - 1))
904 #define dblksize(fs, dp, lbn) \
905 	(((lbn) >= NDADDR || (dp)->di_size >= ((lbn) + 1) << (fs)->lfs_bshift)\
906 	    ? (fs)->lfs_bsize \
907 	    : (fragroundup(fs, blkoff(fs, (dp)->di_size))))
908 
909 #define	segsize(fs)	((fs)->lfs_version == 1 ?	     		\
910 			   lblktosize((fs), (fs)->lfs_ssize) :		\
911 			   (fs)->lfs_ssize)
912 #define segtod(fs, seg) (((fs)->lfs_version == 1     ?	     		\
913 			   (fs)->lfs_ssize << (fs)->lfs_blktodb :	\
914 			   btofsb((fs), (fs)->lfs_ssize)) * (seg))
915 #define	dtosn(fs, daddr)	/* block address to segment number */	\
916 	((uint32_t)(((daddr) - (fs)->lfs_start) / segtod((fs), 1)))
917 #define sntod(fs, sn)		/* segment number to disk address */	\
918 	((daddr_t)(segtod((fs), (sn)) + (fs)->lfs_start))
919 
920 /*
921  * Structures used by lfs_bmapv and lfs_markv to communicate information
922  * about inodes and data blocks.
923  */
924 typedef struct block_info {
925 	u_int32_t bi_inode;		/* inode # */
926 	int32_t	bi_lbn;			/* logical block w/in file */
927 	int32_t	bi_daddr;		/* disk address of block */
928 	u_int64_t bi_segcreate;		/* origin segment create time */
929 	int	bi_version;		/* file version number */
930 	void	*bi_bp;			/* data buffer */
931 	int	bi_size;		/* size of the block (if fragment) */
932 } BLOCK_INFO;
933 
934 /* Compatibility for 1.5 binaries */
935 typedef struct block_info_15 {
936 	u_int32_t bi_inode;		/* inode # */
937 	int32_t	bi_lbn;			/* logical block w/in file */
938 	int32_t	bi_daddr;		/* disk address of block */
939 	u_int32_t bi_segcreate;		/* origin segment create time */
940 	int	bi_version;		/* file version number */
941 	void	*bi_bp;			/* data buffer */
942 	int	bi_size;		/* size of the block (if fragment) */
943 } BLOCK_INFO_15;
944 
945 /* In-memory description of a segment about to be written. */
946 struct segment {
947 	struct lfs	 *fs;		/* file system pointer */
948 	struct buf	**bpp;		/* pointer to buffer array */
949 	struct buf	**cbpp;		/* pointer to next available bp */
950 	struct buf	**start_bpp;	/* pointer to first bp in this set */
951 	struct buf	 *ibp;		/* buffer pointer to inode page */
952 	struct ufs1_dinode    *idp;          /* pointer to ifile dinode */
953 	struct finfo	 *fip;		/* current fileinfo pointer */
954 	struct vnode	 *vp;		/* vnode being gathered */
955 	void	 *segsum;		/* segment summary info */
956 	u_int32_t ninodes;		/* number of inodes in this segment */
957 	int32_t seg_bytes_left;		/* bytes left in segment */
958 	int32_t sum_bytes_left;		/* bytes left in summary block */
959 	u_int32_t seg_number;		/* number of this segment */
960 	int32_t *start_lbp;		/* beginning lbn for this set */
961 
962 #define	SEGM_CKP	0x01		/* doing a checkpoint */
963 #define	SEGM_CLEAN	0x02		/* cleaner call; don't sort */
964 #define	SEGM_SYNC	0x04		/* wait for segment */
965 #define	SEGM_PROT	0x08		/* don't inactivate at segunlock */
966 #define SEGM_PAGEDAEMON	0x10		/* pagedaemon called us */
967 #define SEGM_WRITERD	0x20		/* LFS writed called us */
968 #define SEGM_FORCE_CKP	0x40		/* Force checkpoint right away */
969 	u_int16_t seg_flags;		/* run-time flags for this segment */
970 	u_int32_t seg_iocount;		/* number of ios pending */
971 	int	  ndupino;		/* number of duplicate inodes */
972 };
973 
974 #ifdef _KERNEL
975 struct lfs_cluster {
976 	size_t bufsize;	       /* Size of kept data */
977 	struct buf **bpp;      /* Array of kept buffers */
978 	int bufcount;	       /* Number of kept buffers */
979 #define LFS_CL_MALLOC	0x00000001
980 #define LFS_CL_SHIFT	0x00000002
981 #define LFS_CL_SYNC	0x00000004
982 	u_int32_t flags;       /* Flags */
983 	struct lfs *fs;	       /* LFS that this belongs to */
984 	struct segment *seg;   /* Segment structure, for LFS_CL_SYNC */
985 };
986 
987 /*
988  * Splay tree containing block numbers allocated through lfs_balloc.
989  */
990 struct lbnentry {
991 	SPLAY_ENTRY(lbnentry) entry;
992 	daddr_t lbn;
993 };
994 #endif /* _KERNEL */
995 
996 /*
997  * LFS inode extensions.
998  */
999 struct lfs_inode_ext {
1000 	off_t	  lfs_osize;		/* size of file on disk */
1001 	u_int32_t lfs_effnblocks;  /* number of blocks when i/o completes */
1002 	size_t	  lfs_fragsize[NDADDR]; /* size of on-disk direct blocks */
1003 	TAILQ_ENTRY(inode) lfs_dchain;  /* Dirop chain. */
1004 	TAILQ_ENTRY(inode) lfs_pchain;  /* Paging chain. */
1005 #define LFSI_NO_GOP_WRITE 0x01
1006 #define LFSI_DELETED      0x02
1007 #define LFSI_WRAPBLOCK    0x04
1008 #define LFSI_WRAPWAIT     0x08
1009 	u_int32_t lfs_iflags;           /* Inode flags */
1010 	daddr_t   lfs_hiblk;		/* Highest lbn held by inode */
1011 #ifdef _KERNEL
1012 	SPLAY_HEAD(lfs_splay, lbnentry) lfs_lbtree; /* Tree of balloc'd lbns */
1013 	int	  lfs_nbtree;		/* Size of tree */
1014 	LIST_HEAD(, segdelta) lfs_segdhd;
1015 #endif
1016 	int16_t	  lfs_odnlink;		/* on-disk nlink count for cleaner */
1017 };
1018 #define i_lfs_osize		inode_ext.lfs->lfs_osize
1019 #define i_lfs_effnblks		inode_ext.lfs->lfs_effnblocks
1020 #define i_lfs_fragsize		inode_ext.lfs->lfs_fragsize
1021 #define i_lfs_dchain		inode_ext.lfs->lfs_dchain
1022 #define i_lfs_pchain		inode_ext.lfs->lfs_pchain
1023 #define i_lfs_iflags		inode_ext.lfs->lfs_iflags
1024 #define i_lfs_hiblk		inode_ext.lfs->lfs_hiblk
1025 #define i_lfs_lbtree		inode_ext.lfs->lfs_lbtree
1026 #define i_lfs_nbtree		inode_ext.lfs->lfs_nbtree
1027 #define i_lfs_segdhd		inode_ext.lfs->lfs_segdhd
1028 #define i_lfs_odnlink		inode_ext.lfs->lfs_odnlink
1029 
1030 /*
1031  * Macros for determining free space on the disk, with the variable metadata
1032  * of segment summaries and inode blocks taken into account.
1033  */
1034 /* Estimate number of clean blocks not available for writing */
1035 #define LFS_EST_CMETA(F) (int32_t)((((F)->lfs_dmeta *			     \
1036 				     (int64_t)(F)->lfs_nclean) /	     \
1037 				      ((F)->lfs_nseg - (F)->lfs_nclean)))
1038 
1039 /* Estimate total size of the disk not including metadata */
1040 #define LFS_EST_NONMETA(F) ((F)->lfs_dsize - (F)->lfs_dmeta - LFS_EST_CMETA(F))
1041 
1042 /* Estimate number of blocks actually available for writing */
1043 #define LFS_EST_BFREE(F) ((F)->lfs_bfree > LFS_EST_CMETA(F) ?		     \
1044 			  (F)->lfs_bfree - LFS_EST_CMETA(F) : 0)
1045 
1046 /* Amount of non-meta space not available to mortal man */
1047 #define LFS_EST_RSVD(F) (int32_t)((LFS_EST_NONMETA(F) *			     \
1048 				   (u_int64_t)(F)->lfs_minfree) /	     \
1049 				  100)
1050 
1051 /* Can credential C write BB blocks */
1052 #define ISSPACE(F, BB, C)						\
1053 	((((C) == NOCRED || kauth_cred_geteuid(C) == 0) &&		\
1054 	  LFS_EST_BFREE(F) >= (BB)) ||					\
1055 	 (kauth_cred_geteuid(C) != 0 && IS_FREESPACE(F, BB)))
1056 
1057 /* Can an ordinary user write BB blocks */
1058 #define IS_FREESPACE(F, BB)						\
1059 	  (LFS_EST_BFREE(F) >= (BB) + LFS_EST_RSVD(F))
1060 
1061 /*
1062  * The minimum number of blocks to create a new inode.  This is:
1063  * directory direct block (1) + NIADDR indirect blocks + inode block (1) +
1064  * ifile direct block (1) + NIADDR indirect blocks = 3 + 2 * NIADDR blocks.
1065  */
1066 #define LFS_NRESERVE(F) (btofsb((F), (2 * NIADDR + 3) << (F)->lfs_bshift))
1067 
1068 /* Statistics Counters */
1069 struct lfs_stats {	/* Must match sysctl list in lfs_vfsops.h ! */
1070 	u_int	segsused;
1071 	u_int	psegwrites;
1072 	u_int	psyncwrites;
1073 	u_int	pcleanwrites;
1074 	u_int	blocktot;
1075 	u_int	cleanblocks;
1076 	u_int	ncheckpoints;
1077 	u_int	nwrites;
1078 	u_int	nsync_writes;
1079 	u_int	wait_exceeded;
1080 	u_int	write_exceeded;
1081 	u_int	flush_invoked;
1082 	u_int	vflush_invoked;
1083 	u_int	clean_inlocked;
1084 	u_int	clean_vnlocked;
1085 	u_int   segs_reclaimed;
1086 };
1087 #ifdef _KERNEL
1088 extern struct lfs_stats lfs_stats;
1089 #endif
1090 
1091 /* Fcntls to take the place of the lfs syscalls */
1092 struct lfs_fcntl_markv {
1093 	BLOCK_INFO *blkiov;	/* blocks to relocate */
1094 	int blkcnt;		/* number of blocks */
1095 };
1096 
1097 #define LFCNSEGWAITALL	 _FCNR_FSPRIV('L', 0, struct timeval)
1098 #define LFCNSEGWAIT	 _FCNR_FSPRIV('L', 1, struct timeval)
1099 #define LFCNBMAPV	_FCNRW_FSPRIV('L', 2, struct lfs_fcntl_markv)
1100 #define LFCNMARKV	_FCNRW_FSPRIV('L', 3, struct lfs_fcntl_markv)
1101 #define LFCNRECLAIM	 _FCNO_FSPRIV('L', 4)
1102 
1103 struct lfs_fhandle {
1104 	char space[28];	/* FHANDLE_SIZE_COMPAT (but used from userland too) */
1105 };
1106 #define LFCNREWIND       _FCNR_FSPRIV('L', 6, int)
1107 #define LFCNINVAL        _FCNR_FSPRIV('L', 7, int)
1108 #define LFCNRESIZE       _FCNR_FSPRIV('L', 8, int)
1109 #define LFCNWRAPSTOP	 _FCNR_FSPRIV('L', 9, int)
1110 #define LFCNWRAPGO	 _FCNR_FSPRIV('L', 10, int)
1111 #define LFCNIFILEFH	 _FCNW_FSPRIV('L', 11, struct lfs_fhandle)
1112 #define LFCNWRAPPASS	 _FCNR_FSPRIV('L', 12, int)
1113 # define LFS_WRAP_GOING   0x0
1114 # define LFS_WRAP_WAITING 0x1
1115 #define LFCNWRAPSTATUS	 _FCNW_FSPRIV('L', 13, int)
1116 /* Compat */
1117 #define LFCNSEGWAITALL_COMPAT	 _FCNW_FSPRIV('L', 0, struct timeval)
1118 #define LFCNSEGWAIT_COMPAT	 _FCNW_FSPRIV('L', 1, struct timeval)
1119 #define LFCNIFILEFH_COMPAT	 _FCNW_FSPRIV('L', 5, struct lfs_fhandle)
1120 #define LFCNIFILEFH_COMPAT2	 _FCN_FSPRIV(F_FSOUT, 'L', 11, 32)
1121 #define LFCNWRAPSTOP_COMPAT	 _FCNO_FSPRIV('L', 9)
1122 #define LFCNWRAPGO_COMPAT	 _FCNO_FSPRIV('L', 10)
1123 
1124 #ifdef _KERNEL
1125 /* XXX MP */
1126 #define	LFS_SEGLOCK_HELD(fs) \
1127 	((fs)->lfs_seglock != 0 &&					\
1128 	 (fs)->lfs_lockpid == curproc->p_pid &&				\
1129 	 (fs)->lfs_locklwp == curlwp->l_lid)
1130 #endif /* _KERNEL */
1131 
1132 /* Debug segment lock */
1133 #ifdef notyet
1134 # define ASSERT_SEGLOCK(fs) KASSERT(LFS_SEGLOCK_HELD(fs))
1135 # define ASSERT_NO_SEGLOCK(fs) KASSERT(!LFS_SEGLOCK_HELD(fs))
1136 # define ASSERT_DUNNO_SEGLOCK(fs)
1137 # define ASSERT_MAYBE_SEGLOCK(fs)
1138 #else /* !notyet */
1139 # define ASSERT_DUNNO_SEGLOCK(fs) \
1140 	DLOG((DLOG_SEG, "lfs func %s seglock wrong (%d)\n", __func__, \
1141 		LFS_SEGLOCK_HELD(fs)))
1142 # define ASSERT_SEGLOCK(fs) do {					\
1143 	if (!LFS_SEGLOCK_HELD(fs)) {					\
1144 		DLOG((DLOG_SEG, "lfs func %s seglock wrong (0)\n", __func__)); \
1145 	}								\
1146 } while(0)
1147 # define ASSERT_NO_SEGLOCK(fs) do {					\
1148 	if (LFS_SEGLOCK_HELD(fs)) {					\
1149 		DLOG((DLOG_SEG, "lfs func %s seglock wrong (1)\n", __func__)); \
1150 	}								\
1151 } while(0)
1152 # define ASSERT_MAYBE_SEGLOCK(x)
1153 #endif /* !notyet */
1154 
1155 __BEGIN_DECLS
1156 void lfs_itimes(struct inode *, const struct timespec *,
1157     const struct timespec *, const struct timespec *);
1158 __END_DECLS
1159 
1160 #endif /* !_UFS_LFS_LFS_H_ */
1161