xref: /netbsd-src/sys/ufs/lfs/lfs_alloc.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /*	$NetBSD: lfs_alloc.c,v 1.133 2016/08/07 05:09:12 dholland Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Konrad E. Schroder <perseant@hhhh.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*
32  * Copyright (c) 1991, 1993
33  *	The Regents of the University of California.  All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. Neither the name of the University nor the names of its contributors
44  *    may be used to endorse or promote products derived from this software
45  *    without specific prior written permission.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57  * SUCH DAMAGE.
58  *
59  *	@(#)lfs_alloc.c	8.4 (Berkeley) 1/4/94
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.133 2016/08/07 05:09:12 dholland Exp $");
64 
65 #if defined(_KERNEL_OPT)
66 #include "opt_quota.h"
67 #endif
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/buf.h>
73 #include <sys/lock.h>
74 #include <sys/vnode.h>
75 #include <sys/syslog.h>
76 #include <sys/mount.h>
77 #include <sys/malloc.h>
78 #include <sys/pool.h>
79 #include <sys/proc.h>
80 #include <sys/kauth.h>
81 
82 #include <ufs/lfs/ulfs_quotacommon.h>
83 #include <ufs/lfs/ulfs_inode.h>
84 #include <ufs/lfs/ulfsmount.h>
85 #include <ufs/lfs/ulfs_extern.h>
86 
87 #include <ufs/lfs/lfs.h>
88 #include <ufs/lfs/lfs_accessors.h>
89 #include <ufs/lfs/lfs_extern.h>
90 #include <ufs/lfs/lfs_kernel.h>
91 
92 /* Constants for inode free bitmap */
93 #define BMSHIFT 5	/* 2 ** 5 = 32 */
94 #define BMMASK  ((1 << BMSHIFT) - 1)
95 #define SET_BITMAP_FREE(F, I) do { \
96 	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), 	\
97 	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
98 	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1 << ((I) & BMMASK));	\
99 } while (0)
100 #define CLR_BITMAP_FREE(F, I) do { \
101 	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), 	\
102 	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
103 	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1 << ((I) & BMMASK));	\
104 } while(0)
105 
106 #define ISSET_BITMAP_FREE(F, I) \
107 	((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1 << ((I) & BMMASK)))
108 
109 /*
110  * Add a new block to the Ifile, to accommodate future file creations.
111  * Called with the segment lock held.
112  */
113 int
114 lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
115 {
116 	struct vnode *vp;
117 	struct inode *ip;
118 	IFILE64 *ifp64;
119 	IFILE32 *ifp32;
120 	IFILE_V1 *ifp_v1;
121 	struct buf *bp, *cbp;
122 	int error;
123 	daddr_t i, blkno, xmax;
124 	ino_t oldlast, maxino;
125 	CLEANERINFO *cip;
126 
127 	ASSERT_SEGLOCK(fs);
128 
129 	/* XXX should check or assert that we aren't readonly. */
130 
131 	/*
132 	 * Get a block and extend the ifile inode. Leave the buffer for
133 	 * the block in bp.
134 	 */
135 
136 	vp = fs->lfs_ivnode;
137 	ip = VTOI(vp);
138 	blkno = lfs_lblkno(fs, ip->i_size);
139 	if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
140 				&bp)) != 0) {
141 		return (error);
142 	}
143 	ip->i_size += lfs_sb_getbsize(fs);
144 	lfs_dino_setsize(fs, ip->i_din, ip->i_size);
145 	uvm_vnp_setsize(vp, ip->i_size);
146 
147 	/*
148 	 * Compute the new number of inodes, and reallocate the in-memory
149 	 * inode freemap.
150 	 */
151 
152 	maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
153 		  lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
154 	fs->lfs_ino_bitmap = (lfs_bm_t *)
155 		realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
156 			sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
157 	KASSERT(fs->lfs_ino_bitmap != NULL);
158 
159 	/* first new inode number */
160 	i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
161 		lfs_sb_getifpb(fs);
162 
163 	/*
164 	 * We insert the new inodes at the head of the free list.
165 	 * Under normal circumstances, the free list is empty here,
166 	 * so we are also incidentally placing them at the end (which
167 	 * we must do if we are to keep them in order).
168 	 */
169 	LFS_GET_HEADFREE(fs, cip, cbp, &oldlast);
170 	LFS_PUT_HEADFREE(fs, cip, cbp, i);
171 #ifdef DIAGNOSTIC
172 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM)
173 		panic("inode 0 allocated [2]");
174 #endif /* DIAGNOSTIC */
175 
176 	/* inode number to stop at (XXX: why *x*max?) */
177 	xmax = i + lfs_sb_getifpb(fs);
178 
179 	/*
180 	 * Initialize the ifile block.
181 	 *
182 	 * XXX: these loops should be restructured to use the accessor
183 	 * functions instead of using cutpaste polymorphism.
184 	 */
185 
186 	if (fs->lfs_is64) {
187 		for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
188 			SET_BITMAP_FREE(fs, i);
189 			ifp64->if_version = 1;
190 			ifp64->if_daddr = LFS_UNUSED_DADDR;
191 			ifp64->if_nextfree = ++i;
192 		}
193 		ifp64--;
194 		ifp64->if_nextfree = oldlast;
195 	} else if (lfs_sb_getversion(fs) > 1) {
196 		for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
197 			SET_BITMAP_FREE(fs, i);
198 			ifp32->if_version = 1;
199 			ifp32->if_daddr = LFS_UNUSED_DADDR;
200 			ifp32->if_nextfree = ++i;
201 		}
202 		ifp32--;
203 		ifp32->if_nextfree = oldlast;
204 	} else {
205 		for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
206 			SET_BITMAP_FREE(fs, i);
207 			ifp_v1->if_version = 1;
208 			ifp_v1->if_daddr = LFS_UNUSED_DADDR;
209 			ifp_v1->if_nextfree = ++i;
210 		}
211 		ifp_v1--;
212 		ifp_v1->if_nextfree = oldlast;
213 	}
214 	LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1);
215 
216 	/*
217 	 * Write out the new block.
218 	 */
219 
220 	(void) LFS_BWRITE_LOG(bp); /* Ifile */
221 
222 	return 0;
223 }
224 
225 /*
226  * Allocate an inode for a new file.
227  *
228  * Takes the segment lock. Also (while holding it) takes lfs_lock
229  * to frob fs->lfs_fmod.
230  *
231  * XXX: the mode argument is unused; should just get rid of it.
232  */
233 /* ARGSUSED */
234 /* VOP_BWRITE 2i times */
235 int
236 lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
237     ino_t *ino, int *gen)
238 {
239 	struct lfs *fs;
240 	struct buf *bp, *cbp;
241 	IFILE *ifp;
242 	int error;
243 	CLEANERINFO *cip;
244 
245 	fs = VTOI(pvp)->i_lfs;
246 	if (fs->lfs_ronly)
247 		return EROFS;
248 
249 	ASSERT_NO_SEGLOCK(fs);
250 
251 	lfs_seglock(fs, SEGM_PROT);
252 
253 	/* Get the head of the freelist. */
254 	LFS_GET_HEADFREE(fs, cip, cbp, ino);
255 
256 	/* paranoia */
257 	KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
258 	DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
259 	     *ino));
260 
261 	/* Update the in-memory inode freemap */
262 	CLR_BITMAP_FREE(fs, *ino);
263 
264 	/*
265 	 * Fetch the ifile entry and make sure the inode is really
266 	 * free.
267 	 */
268 	LFS_IENTRY(ifp, fs, *ino, bp);
269 	if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
270 		panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
271 		    *ino);
272 
273 	/* Update the inode freelist head in the superblock. */
274 	LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
275 	DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
276 	     *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
277 
278 	/*
279 	 * Retrieve the version number from the ifile entry. It was
280 	 * bumped by vfree, so don't bump it again.
281 	 */
282 	*gen = lfs_if_getversion(fs, ifp);
283 
284 	/* Done with ifile entry */
285 	brelse(bp, 0);
286 
287 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
288 		/*
289 		 * No more inodes; extend the ifile so that the next
290 		 * lfs_valloc will succeed.
291 		 */
292 		if ((error = lfs_extend_ifile(fs, cred)) != 0) {
293 			/* restore the freelist */
294 			LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
295 
296 			/* unlock and return */
297 			lfs_segunlock(fs);
298 			return error;
299 		}
300 	}
301 #ifdef DIAGNOSTIC
302 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM)
303 		panic("inode 0 allocated [3]");
304 #endif /* DIAGNOSTIC */
305 
306 	/* Set superblock modified bit */
307 	mutex_enter(&lfs_lock);
308 	fs->lfs_fmod = 1;
309 	mutex_exit(&lfs_lock);
310 
311 	/* increment file count */
312 	lfs_sb_addnfiles(fs, 1);
313 
314 	/* done */
315 	lfs_segunlock(fs);
316 	return 0;
317 }
318 
319 /*
320  * Allocate an inode for a new file, with given inode number and
321  * version.
322  *
323  * Called in the same context as lfs_valloc and therefore shares the
324  * same locking assumptions.
325  *
326  * XXX: WHICH MEANS IT OUGHT TO TAKE THE SEGLOCK WHILE FROBBING THIS
327  * XXX: STUFF. REALLY.
328  */
329 int
330 lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
331 {
332 	IFILE *ifp;
333 	struct buf *bp, *cbp;
334 	ino_t headino, thisino, oldnext;
335 	CLEANERINFO *cip;
336 
337 	/* XXX: check for readonly */
338 	/* XXX: assert no seglock */
339 	/* XXX: should take seglock (as noted above) */
340 
341 	/*
342 	 * If the ifile is too short to contain this inum, extend it.
343 	 *
344 	 * XXX: lfs_extend_ifile should take a size instead of always
345 	 * doing just one block at time.
346 	 */
347 	while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
348 		lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
349 		<< lfs_sb_getbshift(fs)) {
350 		lfs_extend_ifile(fs, NOCRED);
351 	}
352 
353 	/*
354 	 * fetch the ifile entry; get the inode freelist next pointer,
355 	 * and set the version as directed.
356 	 */
357 	LFS_IENTRY(ifp, fs, ino, bp);
358 	oldnext = lfs_if_getnextfree(fs, ifp);
359 	lfs_if_setversion(fs, ifp, vers);
360 	brelse(bp, 0);
361 
362 	/* Get head of inode freelist */
363 	LFS_GET_HEADFREE(fs, cip, cbp, &headino);
364 	if (headino == ino) {
365 		/* Easy case: the inode we wanted was at the head */
366 		LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
367 	} else {
368 		ino_t nextfree;
369 
370 		/* Have to find the desired inode in the freelist... */
371 
372 		thisino = headino;
373 		while (1) {
374 			/* read this ifile entry */
375 			LFS_IENTRY(ifp, fs, thisino, bp);
376 			nextfree = lfs_if_getnextfree(fs, ifp);
377 			/* stop if we find it or we hit the end */
378 			if (nextfree == ino ||
379 			    nextfree == LFS_UNUSED_INUM)
380 				break;
381 			/* nope, keep going... */
382 			thisino = nextfree;
383 			brelse(bp, 0);
384 		}
385 		if (nextfree == LFS_UNUSED_INUM) {
386 			/* hit the end -- this inode is not available */
387 			brelse(bp, 0);
388 			/* XXX release seglock (see above) */
389 			return ENOENT;
390 		}
391 		/* found it; update the next pointer */
392 		lfs_if_setnextfree(fs, ifp, oldnext);
393 		/* write the ifile block */
394 		LFS_BWRITE_LOG(bp);
395 	}
396 
397 	/* done */
398 	/* XXX release seglock (see above) */
399 	return 0;
400 }
401 
402 #if 0
403 /*
404  * Find the highest-numbered allocated inode.
405  * This will be used to shrink the Ifile.
406  */
407 static inline ino_t
408 lfs_last_alloc_ino(struct lfs *fs)
409 {
410 	ino_t ino, maxino;
411 
412 	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
413 		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
414 		lfs_sb_getifpb(fs);
415 	for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
416 		if (ISSET_BITMAP_FREE(fs, ino) == 0)
417 			break;
418 	}
419 	return ino;
420 }
421 #endif
422 
423 /*
424  * Find the previous (next lowest numbered) free inode, if any.
425  * If there is none, return LFS_UNUSED_INUM.
426  *
427  * XXX: locking?
428  */
429 static inline ino_t
430 lfs_freelist_prev(struct lfs *fs, ino_t ino)
431 {
432 	ino_t tino, bound, bb, freehdbb;
433 
434 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
435 		/* No free inodes at all */
436 		return LFS_UNUSED_INUM;
437 	}
438 
439 	/* Search our own word first */
440 	bound = ino & ~BMMASK;
441 	for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
442 		if (ISSET_BITMAP_FREE(fs, tino))
443 			return tino;
444 	/* If there are no lower words to search, just return */
445 	if (ino >> BMSHIFT == 0)
446 		return LFS_UNUSED_INUM;
447 
448 	/*
449 	 * Find a word with a free inode in it.  We have to be a bit
450 	 * careful here since ino_t is unsigned.
451 	 */
452 	freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
453 	for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
454 		if (fs->lfs_ino_bitmap[bb])
455 			break;
456 	if (fs->lfs_ino_bitmap[bb] == 0)
457 		return LFS_UNUSED_INUM;
458 
459 	/* Search the word we found */
460 	for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
461 	     tino > LFS_UNUSED_INUM; tino--)
462 		if (ISSET_BITMAP_FREE(fs, tino))
463 			break;
464 
465 	/* Avoid returning reserved inode numbers */
466 	if (tino <= LFS_IFILE_INUM)
467 		tino = LFS_UNUSED_INUM;
468 
469 	return tino;
470 }
471 
472 /*
473  * Free an inode.
474  *
475  * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
476  */
477 /* ARGUSED */
478 /* VOP_BWRITE 2i times */
479 int
480 lfs_vfree(struct vnode *vp, ino_t ino, int mode)
481 {
482 	SEGUSE *sup;
483 	CLEANERINFO *cip;
484 	struct buf *cbp, *bp;
485 	IFILE *ifp;
486 	struct inode *ip;
487 	struct lfs *fs;
488 	daddr_t old_iaddr;
489 	ino_t otail;
490 
491 	/* Get the inode number and file system. */
492 	ip = VTOI(vp);
493 	fs = ip->i_lfs;
494 	ino = ip->i_number;
495 
496 	/* XXX: assert not readonly */
497 
498 	ASSERT_NO_SEGLOCK(fs);
499 	DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
500 
501 	/* Drain of pending writes */
502 	mutex_enter(vp->v_interlock);
503 	while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
504 		cv_wait(&vp->v_cv, vp->v_interlock);
505 	}
506 	mutex_exit(vp->v_interlock);
507 
508 	lfs_seglock(fs, SEGM_PROT);
509 
510 	/*
511 	 * If the inode was in a dirop, it isn't now.
512 	 *
513 	 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_flag & IN_ADIROP)
514 	 * not updated together in one function? (and why do both exist,
515 	 * anyway?)
516 	 */
517 	lfs_unmark_vnode(vp);
518 
519 	mutex_enter(&lfs_lock);
520 	if (vp->v_uflag & VU_DIROP) {
521 		vp->v_uflag &= ~VU_DIROP;
522 		--lfs_dirvcount;
523 		--fs->lfs_dirvcount;
524 		TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
525 		wakeup(&fs->lfs_dirvcount);
526 		wakeup(&lfs_dirvcount);
527 		mutex_exit(&lfs_lock);
528 		vrele(vp);
529 
530 		/*
531 		 * If this inode is not going to be written any more, any
532 		 * segment accounting left over from its truncation needs
533 		 * to occur at the end of the next dirops flush.  Attach
534 		 * them to the fs-wide list for that purpose.
535 		 */
536 		if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
537 			struct segdelta *sd;
538 
539 			while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
540 				LIST_REMOVE(sd, list);
541 				LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
542 			}
543 		}
544 	} else {
545 		/*
546 		 * If it's not a dirop, we can finalize right away.
547 		 */
548 		mutex_exit(&lfs_lock);
549 		lfs_finalize_ino_seguse(fs, ip);
550 	}
551 
552 	/* it is no longer an unwritten inode, so update the counts */
553 	mutex_enter(&lfs_lock);
554 	LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
555 	mutex_exit(&lfs_lock);
556 
557 	/* Turn off all inode modification flags */
558 	ip->i_flag &= ~IN_ALLMOD;
559 
560 	/* Mark it deleted */
561 	ip->i_lfs_iflags |= LFSI_DELETED;
562 
563 	/* Mark it free in the in-memory inode freemap */
564 	SET_BITMAP_FREE(fs, ino);
565 
566 	/*
567 	 * Set the ifile's inode entry to unused, increment its version number
568 	 * and link it onto the free chain.
569 	 */
570 
571 	/* fetch the ifile entry */
572 	LFS_IENTRY(ifp, fs, ino, bp);
573 
574 	/* update the on-disk address (to "nowhere") */
575 	old_iaddr = lfs_if_getdaddr(fs, ifp);
576 	lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
577 
578 	/* bump the version */
579 	lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
580 
581 	if (lfs_sb_getversion(fs) == 1) {
582 		ino_t nextfree;
583 
584 		/* insert on freelist */
585 		LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
586 		lfs_if_setnextfree(fs, ifp, nextfree);
587 		LFS_PUT_HEADFREE(fs, cip, cbp, ino);
588 
589 		/* write the ifile block */
590 		(void) LFS_BWRITE_LOG(bp); /* Ifile */
591 	} else {
592 		ino_t tino, onf;
593 
594 		/*
595 		 * Clear the freelist next pointer and write the ifile
596 		 * block. XXX: why? I'm sure there must be a reason but
597 		 * it seems both silly and dangerous.
598 		 */
599 		lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
600 		(void) LFS_BWRITE_LOG(bp); /* Ifile */
601 
602 		/*
603 		 * Insert on freelist in order.
604 		 */
605 
606 		/* Find the next lower (by number) free inode */
607 		tino = lfs_freelist_prev(fs, ino);
608 
609 		if (tino == LFS_UNUSED_INUM) {
610 			ino_t nextfree;
611 
612 			/*
613 			 * There isn't one; put us on the freelist head.
614 			 */
615 
616 			/* reload the ifile block */
617 			LFS_IENTRY(ifp, fs, ino, bp);
618 			/* update the list */
619 			LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
620 			lfs_if_setnextfree(fs, ifp, nextfree);
621 			LFS_PUT_HEADFREE(fs, cip, cbp, ino);
622 			DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
623 			     (long long)nextfree, (long long)ino));
624 			/* write the ifile block */
625 			LFS_BWRITE_LOG(bp); /* Ifile */
626 
627 			/* If the list was empty, set tail too */
628 			LFS_GET_TAILFREE(fs, cip, cbp, &otail);
629 			if (otail == LFS_UNUSED_INUM) {
630 				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
631 				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
632 				      "-> %lld\n", (long long)otail,
633 				      (long long)ino));
634 			}
635 		} else {
636 			/*
637 			 * Insert this inode into the list after tino.
638 			 * We hold the segment lock so we don't have to
639 			 * worry about blocks being written out of order.
640 			 */
641 
642 			DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
643 			      " after %lld\n", ino, tino));
644 
645 			/* load the previous inode's ifile block */
646 			LFS_IENTRY(ifp, fs, tino, bp);
647 			/* update the list pointer */
648 			onf = lfs_if_getnextfree(fs, ifp);
649 			lfs_if_setnextfree(fs, ifp, ino);
650 			/* write the block */
651 			LFS_BWRITE_LOG(bp);	/* Ifile */
652 
653 			/* load this inode's ifile block */
654 			LFS_IENTRY(ifp, fs, ino, bp);
655 			/* update the list pointer */
656 			lfs_if_setnextfree(fs, ifp, onf);
657 			/* write the block */
658 			LFS_BWRITE_LOG(bp);	/* Ifile */
659 
660 			/* If we're last, put us on the tail */
661 			if (onf == LFS_UNUSED_INUM) {
662 				LFS_GET_TAILFREE(fs, cip, cbp, &otail);
663 				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
664 				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
665 				      "-> %lld\n", (long long)otail,
666 				      (long long)ino));
667 			}
668 		}
669 	}
670 #ifdef DIAGNOSTIC
671 	/* XXX: shouldn't this check be further up *before* we trash the fs? */
672 	if (ino == LFS_UNUSED_INUM) {
673 		panic("inode 0 freed");
674 	}
675 #endif /* DIAGNOSTIC */
676 
677 	/*
678 	 * Update the segment summary for the segment where the on-disk
679 	 * copy used to be.
680 	 */
681 	if (old_iaddr != LFS_UNUSED_DADDR) {
682 		/* load it */
683 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
684 #ifdef DIAGNOSTIC
685 		/* the number of bytes in the segment should not become < 0 */
686 		if (sup->su_nbytes < DINOSIZE(fs)) {
687 			printf("lfs_vfree: negative byte count"
688 			       " (segment %" PRIu32 " short by %d)\n",
689 			       lfs_dtosn(fs, old_iaddr),
690 			       (int)DINOSIZE(fs) -
691 				    sup->su_nbytes);
692 			panic("lfs_vfree: negative byte count");
693 			sup->su_nbytes = DINOSIZE(fs);
694 		}
695 #endif
696 		/* update the number of bytes in the segment */
697 		sup->su_nbytes -= DINOSIZE(fs);
698 		/* write the segment entry */
699 		LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
700 	}
701 
702 	/* Set superblock modified bit. */
703 	mutex_enter(&lfs_lock);
704 	fs->lfs_fmod = 1;
705 	mutex_exit(&lfs_lock);
706 
707 	/* Decrement file count. */
708 	lfs_sb_subnfiles(fs, 1);
709 
710 	lfs_segunlock(fs);
711 
712 	return (0);
713 }
714 
715 /*
716  * Sort the freelist and set up the free-inode bitmap.
717  * To be called by lfs_mountfs().
718  *
719  * Takes the segmenet lock.
720  */
721 void
722 lfs_order_freelist(struct lfs *fs)
723 {
724 	CLEANERINFO *cip;
725 	IFILE *ifp = NULL;
726 	struct buf *bp;
727 	ino_t ino, firstino, lastino, maxino;
728 #ifdef notyet
729 	struct vnode *vp;
730 #endif
731 
732 	ASSERT_NO_SEGLOCK(fs);
733 	lfs_seglock(fs, SEGM_PROT);
734 
735 	/* largest inode on fs */
736 	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
737 		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
738 
739 	/* allocate the in-memory inode freemap */
740 	/* XXX: assert that fs->lfs_ino_bitmap is null here */
741 	fs->lfs_ino_bitmap =
742 		malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
743 		       M_SEGMENT, M_WAITOK | M_ZERO);
744 	KASSERT(fs->lfs_ino_bitmap != NULL);
745 
746 	/*
747 	 * Scan the ifile.
748 	 */
749 
750 	firstino = lastino = LFS_UNUSED_INUM;
751 	for (ino = 0; ino < maxino; ino++) {
752 		/* Load this inode's ifile entry. */
753 		if (ino % lfs_sb_getifpb(fs) == 0)
754 			LFS_IENTRY(ifp, fs, ino, bp);
755 		else
756 			LFS_IENTRY_NEXT(ifp, fs);
757 
758 		/* Don't put zero or ifile on the free list */
759 		if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
760 			continue;
761 
762 #ifdef notyet
763 		/*
764 		 * Address orphaned files.
765 		 *
766 		 * The idea of this is to free inodes belonging to
767 		 * files that were unlinked but not reclaimed, I guess
768 		 * because if we're going to scan the whole ifile
769 		 * anyway it costs very little to do this. I don't
770 		 * immediately see any reason this should be disabled,
771 		 * but presumably it doesn't work... not sure what
772 		 * happens to such files currently. -- dholland 20160806
773 		 */
774 		if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE &&
775 		    VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp) == 0) {
776 			unsigned segno;
777 
778 			/* get the segment the inode in on disk  */
779 			segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
780 
781 			/* truncate the inode */
782 			lfs_truncate(vp, 0, 0, NOCRED);
783 			vput(vp);
784 
785 			/* load the segment summary */
786 			LFS_SEGENTRY(sup, fs, segno, bp);
787 			/* update the number of bytes in the segment */
788 			KASSERT(sup->su_nbytes >= DINOSIZE(fs));
789 			sup->su_nbytes -= DINOSIZE(fs);
790 			/* write the segment summary */
791 			LFS_WRITESEGENTRY(sup, fs, segno, bp);
792 
793 			/* Drop the on-disk address */
794 			lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
795 			/* write the ifile entry */
796 			LFS_BWRITE_LOG(bp);
797 
798 			/*
799 			 * and reload it (XXX: why? I guess
800 			 * LFS_BWRITE_LOG drops it...)
801 			 */
802 			LFS_IENTRY(ifp, fs, ino, bp);
803 
804 			/* Fall through to next if block */
805 		}
806 #endif
807 
808 		if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
809 
810 			/*
811 			 * This inode is free. Put it on the free list.
812 			 */
813 
814 			if (firstino == LFS_UNUSED_INUM) {
815 				/* XXX: assert lastino == LFS_UNUSED_INUM? */
816 				/* remember the first free inode */
817 				firstino = ino;
818 			} else {
819 				/* release this inode's ifile entry */
820 				brelse(bp, 0);
821 
822 				/* XXX: assert lastino != LFS_UNUSED_INUM? */
823 
824 				/* load lastino's ifile entry */
825 				LFS_IENTRY(ifp, fs, lastino, bp);
826 				/* set the list pointer */
827 				lfs_if_setnextfree(fs, ifp, ino);
828 				/* write the block */
829 				LFS_BWRITE_LOG(bp);
830 
831 				/* reload this inode's ifile entry */
832 				LFS_IENTRY(ifp, fs, ino, bp);
833 			}
834 			/* remember the last free inode seen so far */
835 			lastino = ino;
836 
837 			/* Mark this inode free in the in-memory freemap */
838 			SET_BITMAP_FREE(fs, ino);
839 		}
840 
841 		/* If moving to the next ifile block, release the buffer. */
842 		if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
843 			brelse(bp, 0);
844 	}
845 
846 	/* Write the freelist head and tail pointers */
847 	/* XXX: do we need to mark the superblock dirty? */
848 	LFS_PUT_HEADFREE(fs, cip, bp, firstino);
849 	LFS_PUT_TAILFREE(fs, cip, bp, lastino);
850 
851 	/* done */
852 	lfs_segunlock(fs);
853 }
854 
855 /*
856  * Mark a file orphaned (unlinked but not yet reclaimed) by inode
857  * number. Do this with a magic freelist next pointer.
858  *
859  * XXX: howzabout some locking?
860  */
861 void
862 lfs_orphan(struct lfs *fs, ino_t ino)
863 {
864 	IFILE *ifp;
865 	struct buf *bp;
866 
867 	LFS_IENTRY(ifp, fs, ino, bp);
868 	lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE);
869 	LFS_BWRITE_LOG(bp);
870 }
871