xref: /netbsd-src/sys/ufs/lfs/lfs_alloc.c (revision 9bf46809165dbc5e69e70487ac78e5e8db0403e2)
1 /*	$NetBSD: lfs_alloc.c,v 1.141 2020/02/23 08:49:46 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Konrad E. Schroder <perseant@hhhh.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*
32  * Copyright (c) 1991, 1993
33  *	The Regents of the University of California.  All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. Neither the name of the University nor the names of its contributors
44  *    may be used to endorse or promote products derived from this software
45  *    without specific prior written permission.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57  * SUCH DAMAGE.
58  *
59  *	@(#)lfs_alloc.c	8.4 (Berkeley) 1/4/94
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.141 2020/02/23 08:49:46 riastradh Exp $");
64 
65 #if defined(_KERNEL_OPT)
66 #include "opt_quota.h"
67 #endif
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/buf.h>
73 #include <sys/lock.h>
74 #include <sys/vnode.h>
75 #include <sys/syslog.h>
76 #include <sys/mount.h>
77 #include <sys/malloc.h>
78 #include <sys/pool.h>
79 #include <sys/proc.h>
80 #include <sys/kauth.h>
81 
82 #include <ufs/lfs/ulfs_quotacommon.h>
83 #include <ufs/lfs/ulfs_inode.h>
84 #include <ufs/lfs/ulfsmount.h>
85 #include <ufs/lfs/ulfs_extern.h>
86 
87 #include <ufs/lfs/lfs.h>
88 #include <ufs/lfs/lfs_accessors.h>
89 #include <ufs/lfs/lfs_extern.h>
90 #include <ufs/lfs/lfs_kernel.h>
91 
92 /* Constants for inode free bitmap */
93 #define BMSHIFT 5	/* 2 ** 5 = 32 */
94 #define BMMASK  ((1 << BMSHIFT) - 1)
95 #define SET_BITMAP_FREE(F, I) do { \
96 	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), 	\
97 	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
98 	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1U << ((I) & BMMASK));	\
99 } while (0)
100 #define CLR_BITMAP_FREE(F, I) do { \
101 	DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), 	\
102 	     (int)((I) >> BMSHIFT), (int)((I) & BMMASK)));		\
103 	(F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1U << ((I) & BMMASK));	\
104 } while(0)
105 
106 #define ISSET_BITMAP_FREE(F, I) \
107 	((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1U << ((I) & BMMASK)))
108 
109 /*
110  * Add a new block to the Ifile, to accommodate future file creations.
111  * Called with the segment lock held.
112  */
113 int
lfs_extend_ifile(struct lfs * fs,kauth_cred_t cred)114 lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
115 {
116 	struct vnode *vp;
117 	struct inode *ip;
118 	IFILE64 *ifp64;
119 	IFILE32 *ifp32;
120 	IFILE_V1 *ifp_v1;
121 	struct buf *bp, *cbp;
122 	int error;
123 	daddr_t i, blkno, xmax;
124 	ino_t oldlast, maxino;
125 	CLEANERINFO *cip;
126 
127 	ASSERT_SEGLOCK(fs);
128 
129 	/* XXX should check or assert that we aren't readonly. */
130 
131 	/*
132 	 * Get a block and extend the ifile inode. Leave the buffer for
133 	 * the block in bp.
134 	 */
135 
136 	vp = fs->lfs_ivnode;
137 	ip = VTOI(vp);
138 	blkno = lfs_lblkno(fs, ip->i_size);
139 	if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
140 				&bp)) != 0) {
141 		return (error);
142 	}
143 	ip->i_size += lfs_sb_getbsize(fs);
144 	lfs_dino_setsize(fs, ip->i_din, ip->i_size);
145 	uvm_vnp_setsize(vp, ip->i_size);
146 
147 	/*
148 	 * Compute the new number of inodes, and reallocate the in-memory
149 	 * inode freemap.
150 	 */
151 
152 	maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
153 		  lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
154 	fs->lfs_ino_bitmap = (lfs_bm_t *)
155 		realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
156 			sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
157 	KASSERT(fs->lfs_ino_bitmap != NULL);
158 
159 	/* first new inode number */
160 	i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
161 		lfs_sb_getifpb(fs);
162 
163 	/*
164 	 * We insert the new inodes at the head of the free list.
165 	 * Under normal circumstances, the free list is empty here,
166 	 * so we are also incidentally placing them at the end (which
167 	 * we must do if we are to keep them in order).
168 	 */
169 	LFS_GET_HEADFREE(fs, cip, cbp, &oldlast);
170 	LFS_PUT_HEADFREE(fs, cip, cbp, i);
171 	KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
172 	    "inode 0 allocated [2]");
173 
174 	/* inode number to stop at (XXX: why *x*max?) */
175 	xmax = i + lfs_sb_getifpb(fs);
176 
177 	/*
178 	 * Initialize the ifile block.
179 	 *
180 	 * XXX: these loops should be restructured to use the accessor
181 	 * functions instead of using cutpaste polymorphism.
182 	 */
183 
184 	if (fs->lfs_is64) {
185 		for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
186 			SET_BITMAP_FREE(fs, i);
187 			ifp64->if_version = 1;
188 			ifp64->if_daddr = LFS_UNUSED_DADDR;
189 			ifp64->if_nextfree = ++i;
190 		}
191 		ifp64--;
192 		ifp64->if_nextfree = oldlast;
193 	} else if (lfs_sb_getversion(fs) > 1) {
194 		for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
195 			SET_BITMAP_FREE(fs, i);
196 			ifp32->if_version = 1;
197 			ifp32->if_daddr = LFS_UNUSED_DADDR;
198 			ifp32->if_nextfree = ++i;
199 		}
200 		ifp32--;
201 		ifp32->if_nextfree = oldlast;
202 	} else {
203 		for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
204 			SET_BITMAP_FREE(fs, i);
205 			ifp_v1->if_version = 1;
206 			ifp_v1->if_daddr = LFS_UNUSED_DADDR;
207 			ifp_v1->if_nextfree = ++i;
208 		}
209 		ifp_v1--;
210 		ifp_v1->if_nextfree = oldlast;
211 	}
212 	LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1);
213 
214 	/*
215 	 * Write out the new block.
216 	 */
217 
218 	(void) LFS_BWRITE_LOG(bp); /* Ifile */
219 
220 	return 0;
221 }
222 
223 /*
224  * Allocate an inode for a new file.
225  *
226  * Takes the segment lock. Also (while holding it) takes lfs_lock
227  * to frob fs->lfs_fmod.
228  *
229  * XXX: the mode argument is unused; should just get rid of it.
230  */
231 /* ARGSUSED */
232 /* VOP_BWRITE 2i times */
233 int
lfs_valloc(struct vnode * pvp,int mode,kauth_cred_t cred,ino_t * ino,int * gen)234 lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
235     ino_t *ino, int *gen)
236 {
237 	struct lfs *fs;
238 	struct buf *bp, *cbp;
239 	IFILE *ifp;
240 	int error;
241 	CLEANERINFO *cip;
242 
243 	fs = VTOI(pvp)->i_lfs;
244 	if (fs->lfs_ronly)
245 		return EROFS;
246 
247 	ASSERT_NO_SEGLOCK(fs);
248 
249 	lfs_seglock(fs, SEGM_PROT);
250 
251 	/* Get the head of the freelist. */
252 	LFS_GET_HEADFREE(fs, cip, cbp, ino);
253 
254 	/* paranoia */
255 	KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
256 	DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
257 	     *ino));
258 
259 	/* Update the in-memory inode freemap */
260 	CLR_BITMAP_FREE(fs, *ino);
261 
262 	/*
263 	 * Fetch the ifile entry and make sure the inode is really
264 	 * free.
265 	 */
266 	LFS_IENTRY(ifp, fs, *ino, bp);
267 	if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
268 		panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
269 		    *ino);
270 
271 	/* Update the inode freelist head in the superblock. */
272 	LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
273 	DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
274 	     *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
275 
276 	/*
277 	 * Retrieve the version number from the ifile entry. It was
278 	 * bumped by vfree, so don't bump it again.
279 	 */
280 	*gen = lfs_if_getversion(fs, ifp);
281 
282 	/* Done with ifile entry */
283 	brelse(bp, 0);
284 
285 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
286 		/*
287 		 * No more inodes; extend the ifile so that the next
288 		 * lfs_valloc will succeed.
289 		 */
290 		if ((error = lfs_extend_ifile(fs, cred)) != 0) {
291 			/* restore the freelist */
292 			LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
293 
294 			/* unlock and return */
295 			lfs_segunlock(fs);
296 			return error;
297 		}
298 	}
299 	KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
300 	    "inode 0 allocated [3]");
301 
302 	/* Set superblock modified bit */
303 	mutex_enter(&lfs_lock);
304 	fs->lfs_fmod = 1;
305 	mutex_exit(&lfs_lock);
306 
307 	/* increment file count */
308 	lfs_sb_addnfiles(fs, 1);
309 
310 	/* done */
311 	lfs_segunlock(fs);
312 	return 0;
313 }
314 
315 /*
316  * Allocate an inode for a new file, with given inode number and
317  * version.
318  *
319  * Called in the same context as lfs_valloc and therefore shares the
320  * same locking assumptions.
321  */
322 int
lfs_valloc_fixed(struct lfs * fs,ino_t ino,int vers)323 lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
324 {
325 	IFILE *ifp;
326 	struct buf *bp, *cbp;
327 	ino_t headino, thisino, oldnext;
328 	CLEANERINFO *cip;
329 
330 	if (fs->lfs_ronly)
331 		return EROFS;
332 
333 	ASSERT_NO_SEGLOCK(fs);
334 
335 	lfs_seglock(fs, SEGM_PROT);
336 
337 	/*
338 	 * If the ifile is too short to contain this inum, extend it.
339 	 *
340 	 * XXX: lfs_extend_ifile should take a size instead of always
341 	 * doing just one block at time.
342 	 */
343 	while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
344 		lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
345 		<< lfs_sb_getbshift(fs)) {
346 		lfs_extend_ifile(fs, NOCRED);
347 	}
348 
349 	/*
350 	 * fetch the ifile entry; get the inode freelist next pointer,
351 	 * and set the version as directed.
352 	 */
353 	LFS_IENTRY(ifp, fs, ino, bp);
354 	oldnext = lfs_if_getnextfree(fs, ifp);
355 	lfs_if_setversion(fs, ifp, vers);
356 	brelse(bp, 0);
357 
358 	/* Get head of inode freelist */
359 	LFS_GET_HEADFREE(fs, cip, cbp, &headino);
360 	if (headino == ino) {
361 		/* Easy case: the inode we wanted was at the head */
362 		LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
363 	} else {
364 		ino_t nextfree;
365 
366 		/* Have to find the desired inode in the freelist... */
367 
368 		thisino = headino;
369 		while (1) {
370 			/* read this ifile entry */
371 			LFS_IENTRY(ifp, fs, thisino, bp);
372 			nextfree = lfs_if_getnextfree(fs, ifp);
373 			/* stop if we find it or we hit the end */
374 			if (nextfree == ino ||
375 			    nextfree == LFS_UNUSED_INUM)
376 				break;
377 			/* nope, keep going... */
378 			thisino = nextfree;
379 			brelse(bp, 0);
380 		}
381 		if (nextfree == LFS_UNUSED_INUM) {
382 			/* hit the end -- this inode is not available */
383 			brelse(bp, 0);
384 			lfs_segunlock(fs);
385 			return ENOENT;
386 		}
387 		/* found it; update the next pointer */
388 		lfs_if_setnextfree(fs, ifp, oldnext);
389 		/* write the ifile block */
390 		LFS_BWRITE_LOG(bp);
391 	}
392 
393 	/* done */
394 	lfs_segunlock(fs);
395 	return 0;
396 }
397 
398 #if 0
399 /*
400  * Find the highest-numbered allocated inode.
401  * This will be used to shrink the Ifile.
402  */
403 static inline ino_t
404 lfs_last_alloc_ino(struct lfs *fs)
405 {
406 	ino_t ino, maxino;
407 
408 	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
409 		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
410 		lfs_sb_getifpb(fs);
411 	for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
412 		if (ISSET_BITMAP_FREE(fs, ino) == 0)
413 			break;
414 	}
415 	return ino;
416 }
417 #endif
418 
419 /*
420  * Find the previous (next lowest numbered) free inode, if any.
421  * If there is none, return LFS_UNUSED_INUM.
422  *
423  * XXX: locking?
424  */
425 static inline ino_t
lfs_freelist_prev(struct lfs * fs,ino_t ino)426 lfs_freelist_prev(struct lfs *fs, ino_t ino)
427 {
428 	ino_t tino, bound, bb, freehdbb;
429 
430 	if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
431 		/* No free inodes at all */
432 		return LFS_UNUSED_INUM;
433 	}
434 
435 	/* Search our own word first */
436 	bound = ino & ~BMMASK;
437 	for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
438 		if (ISSET_BITMAP_FREE(fs, tino))
439 			return tino;
440 	/* If there are no lower words to search, just return */
441 	if (ino >> BMSHIFT == 0)
442 		return LFS_UNUSED_INUM;
443 
444 	/*
445 	 * Find a word with a free inode in it.  We have to be a bit
446 	 * careful here since ino_t is unsigned.
447 	 */
448 	freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
449 	for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
450 		if (fs->lfs_ino_bitmap[bb])
451 			break;
452 	if (fs->lfs_ino_bitmap[bb] == 0)
453 		return LFS_UNUSED_INUM;
454 
455 	/* Search the word we found */
456 	for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
457 	     tino > LFS_UNUSED_INUM; tino--)
458 		if (ISSET_BITMAP_FREE(fs, tino))
459 			break;
460 
461 	/* Avoid returning reserved inode numbers */
462 	if (tino <= LFS_IFILE_INUM)
463 		tino = LFS_UNUSED_INUM;
464 
465 	return tino;
466 }
467 
468 /*
469  * Free an inode.
470  *
471  * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
472  */
473 /* ARGUSED */
474 /* VOP_BWRITE 2i times */
475 int
lfs_vfree(struct vnode * vp,ino_t ino,int mode)476 lfs_vfree(struct vnode *vp, ino_t ino, int mode)
477 {
478 	SEGUSE *sup;
479 	CLEANERINFO *cip;
480 	struct buf *cbp, *bp;
481 	IFILE *ifp;
482 	struct inode *ip;
483 	struct lfs *fs;
484 	daddr_t old_iaddr;
485 	ino_t otail;
486 
487 	/* Get the inode number and file system. */
488 	ip = VTOI(vp);
489 	fs = ip->i_lfs;
490 	ino = ip->i_number;
491 
492 	/* XXX: assert not readonly */
493 
494 	ASSERT_NO_SEGLOCK(fs);
495 	DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
496 
497 	/* Drain of pending writes */
498 	mutex_enter(vp->v_interlock);
499 	while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
500 		cv_wait(&vp->v_cv, vp->v_interlock);
501 	}
502 	mutex_exit(vp->v_interlock);
503 
504 	lfs_seglock(fs, SEGM_PROT);
505 
506 	/*
507 	 * If the inode was in a dirop, it isn't now.
508 	 *
509 	 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP)
510 	 * not updated together in one function? (and why do both exist,
511 	 * anyway?)
512 	 */
513 	UNMARK_VNODE(vp);
514 
515 	mutex_enter(&lfs_lock);
516 	if (vp->v_uflag & VU_DIROP) {
517 		vp->v_uflag &= ~VU_DIROP;
518 		--lfs_dirvcount;
519 		--fs->lfs_dirvcount;
520 		TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
521 		wakeup(&fs->lfs_dirvcount);
522 		wakeup(&lfs_dirvcount);
523 		mutex_exit(&lfs_lock);
524 		vrele(vp);
525 
526 		/*
527 		 * If this inode is not going to be written any more, any
528 		 * segment accounting left over from its truncation needs
529 		 * to occur at the end of the next dirops flush.  Attach
530 		 * them to the fs-wide list for that purpose.
531 		 */
532 		if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
533 			struct segdelta *sd;
534 
535 			while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
536 				LIST_REMOVE(sd, list);
537 				LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
538 			}
539 		}
540 	} else {
541 		/*
542 		 * If it's not a dirop, we can finalize right away.
543 		 */
544 		mutex_exit(&lfs_lock);
545 		lfs_finalize_ino_seguse(fs, ip);
546 	}
547 
548 	/* it is no longer an unwritten inode, so update the counts */
549 	mutex_enter(&lfs_lock);
550 	LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
551 	mutex_exit(&lfs_lock);
552 
553 	/* Turn off all inode modification flags */
554 	ip->i_state &= ~IN_ALLMOD;
555 
556 	/* Mark it deleted */
557 	ip->i_lfs_iflags |= LFSI_DELETED;
558 
559 	/* Mark it free in the in-memory inode freemap */
560 	SET_BITMAP_FREE(fs, ino);
561 
562 	/*
563 	 * Set the ifile's inode entry to unused, increment its version number
564 	 * and link it onto the free chain.
565 	 */
566 
567 	/* fetch the ifile entry */
568 	LFS_IENTRY(ifp, fs, ino, bp);
569 
570 	/* update the on-disk address (to "nowhere") */
571 	old_iaddr = lfs_if_getdaddr(fs, ifp);
572 	lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
573 
574 	/* bump the version */
575 	lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
576 
577 	if (lfs_sb_getversion(fs) == 1) {
578 		ino_t nextfree;
579 
580 		/* insert on freelist */
581 		LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
582 		lfs_if_setnextfree(fs, ifp, nextfree);
583 		LFS_PUT_HEADFREE(fs, cip, cbp, ino);
584 
585 		/* write the ifile block */
586 		(void) LFS_BWRITE_LOG(bp); /* Ifile */
587 	} else {
588 		ino_t tino, onf;
589 
590 		/*
591 		 * Clear the freelist next pointer and write the ifile
592 		 * block. XXX: why? I'm sure there must be a reason but
593 		 * it seems both silly and dangerous.
594 		 */
595 		lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
596 		(void) LFS_BWRITE_LOG(bp); /* Ifile */
597 
598 		/*
599 		 * Insert on freelist in order.
600 		 */
601 
602 		/* Find the next lower (by number) free inode */
603 		tino = lfs_freelist_prev(fs, ino);
604 
605 		if (tino == LFS_UNUSED_INUM) {
606 			ino_t nextfree;
607 
608 			/*
609 			 * There isn't one; put us on the freelist head.
610 			 */
611 
612 			/* reload the ifile block */
613 			LFS_IENTRY(ifp, fs, ino, bp);
614 			/* update the list */
615 			LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
616 			lfs_if_setnextfree(fs, ifp, nextfree);
617 			LFS_PUT_HEADFREE(fs, cip, cbp, ino);
618 			DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
619 			     (long long)nextfree, (long long)ino));
620 			/* write the ifile block */
621 			LFS_BWRITE_LOG(bp); /* Ifile */
622 
623 			/* If the list was empty, set tail too */
624 			LFS_GET_TAILFREE(fs, cip, cbp, &otail);
625 			if (otail == LFS_UNUSED_INUM) {
626 				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
627 				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
628 				      "-> %lld\n", (long long)otail,
629 				      (long long)ino));
630 			}
631 		} else {
632 			/*
633 			 * Insert this inode into the list after tino.
634 			 * We hold the segment lock so we don't have to
635 			 * worry about blocks being written out of order.
636 			 */
637 
638 			DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
639 			      " after %lld\n", ino, tino));
640 
641 			/* load the previous inode's ifile block */
642 			LFS_IENTRY(ifp, fs, tino, bp);
643 			/* update the list pointer */
644 			onf = lfs_if_getnextfree(fs, ifp);
645 			lfs_if_setnextfree(fs, ifp, ino);
646 			/* write the block */
647 			LFS_BWRITE_LOG(bp);	/* Ifile */
648 
649 			/* load this inode's ifile block */
650 			LFS_IENTRY(ifp, fs, ino, bp);
651 			/* update the list pointer */
652 			lfs_if_setnextfree(fs, ifp, onf);
653 			/* write the block */
654 			LFS_BWRITE_LOG(bp);	/* Ifile */
655 
656 			/* If we're last, put us on the tail */
657 			if (onf == LFS_UNUSED_INUM) {
658 				LFS_GET_TAILFREE(fs, cip, cbp, &otail);
659 				LFS_PUT_TAILFREE(fs, cip, cbp, ino);
660 				DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
661 				      "-> %lld\n", (long long)otail,
662 				      (long long)ino));
663 			}
664 		}
665 	}
666 	/* XXX: shouldn't this check be further up *before* we trash the fs? */
667 	KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed");
668 
669 	/*
670 	 * Update the segment summary for the segment where the on-disk
671 	 * copy used to be.
672 	 */
673 	if (old_iaddr != LFS_UNUSED_DADDR) {
674 		/* load it */
675 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
676 		/* the number of bytes in the segment should not become < 0 */
677 		KASSERTMSG((sup->su_nbytes >= DINOSIZE(fs)),
678 		    "lfs_vfree: negative byte count"
679 		    " (segment %" PRIu32 " short by %d)\n",
680 		    lfs_dtosn(fs, old_iaddr),
681 		    (int)DINOSIZE(fs) - sup->su_nbytes);
682 		/* update the number of bytes in the segment */
683 		sup->su_nbytes -= DINOSIZE(fs);
684 		/* write the segment entry */
685 		LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
686 	}
687 
688 	/* Set superblock modified bit. */
689 	mutex_enter(&lfs_lock);
690 	fs->lfs_fmod = 1;
691 	mutex_exit(&lfs_lock);
692 
693 	/* Decrement file count. */
694 	lfs_sb_subnfiles(fs, 1);
695 
696 	lfs_segunlock(fs);
697 
698 	return (0);
699 }
700 
701 /*
702  * Sort the freelist and set up the free-inode bitmap.
703  * To be called by lfs_mountfs().
704  *
705  * Takes the segmenet lock.
706  */
707 void
lfs_order_freelist(struct lfs * fs,ino_t ** orphanp,size_t * norphanp)708 lfs_order_freelist(struct lfs *fs, ino_t **orphanp, size_t *norphanp)
709 {
710 	CLEANERINFO *cip;
711 	IFILE *ifp = NULL;
712 	struct buf *bp;
713 	ino_t ino, firstino, lastino, maxino;
714 	ino_t *orphan = NULL;
715 	size_t norphan = 0;
716 	size_t norphan_alloc = 0;
717 
718 	ASSERT_NO_SEGLOCK(fs);
719 	lfs_seglock(fs, SEGM_PROT);
720 
721 	/* largest inode on fs */
722 	maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
723 		  lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
724 
725 	/* allocate the in-memory inode freemap */
726 	/* XXX: assert that fs->lfs_ino_bitmap is null here */
727 	fs->lfs_ino_bitmap =
728 		malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
729 		       M_SEGMENT, M_WAITOK | M_ZERO);
730 	KASSERT(fs->lfs_ino_bitmap != NULL);
731 
732 	/*
733 	 * Scan the ifile.
734 	 */
735 
736 	firstino = lastino = LFS_UNUSED_INUM;
737 	for (ino = 0; ino < maxino; ino++) {
738 		/* Load this inode's ifile entry. */
739 		if (ino % lfs_sb_getifpb(fs) == 0)
740 			LFS_IENTRY(ifp, fs, ino, bp);
741 		else
742 			LFS_IENTRY_NEXT(ifp, fs);
743 
744 		/* Don't put zero or ifile on the free list */
745 		if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
746 			continue;
747 
748 		/*
749 		 * Address orphaned files.
750 		 *
751 		 * The idea of this is to free inodes belonging to
752 		 * files that were unlinked but not reclaimed, I guess
753 		 * because if we're going to scan the whole ifile
754 		 * anyway it costs very little to do this. I don't
755 		 * immediately see any reason this should be disabled,
756 		 * but presumably it doesn't work... not sure what
757 		 * happens to such files currently. -- dholland 20160806
758 		 */
759 		if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) {
760 			if (orphan == NULL) {
761 				norphan_alloc = 32; /* XXX pulled from arse */
762 				orphan = kmem_zalloc(sizeof(orphan[0]) *
763 				    norphan_alloc, KM_SLEEP);
764 			} else if (norphan == norphan_alloc) {
765 				ino_t *orphan_new;
766 				if (norphan_alloc >= 4096)
767 					norphan_alloc += 4096;
768 				else
769 					norphan_alloc *= 2;
770 				orphan_new = kmem_zalloc(sizeof(orphan[0]) *
771 				    norphan_alloc, KM_SLEEP);
772 				memcpy(orphan_new, orphan, sizeof(orphan[0]) *
773 				    norphan);
774 				kmem_free(orphan, sizeof(orphan[0]) * norphan);
775 				orphan = orphan_new;
776 			}
777 			orphan[norphan++] = ino;
778 		}
779 
780 		if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
781 
782 			/*
783 			 * This inode is free. Put it on the free list.
784 			 */
785 
786 			if (firstino == LFS_UNUSED_INUM) {
787 				/* XXX: assert lastino == LFS_UNUSED_INUM? */
788 				/* remember the first free inode */
789 				firstino = ino;
790 			} else {
791 				/* release this inode's ifile entry */
792 				brelse(bp, 0);
793 
794 				/* XXX: assert lastino != LFS_UNUSED_INUM? */
795 
796 				/* load lastino's ifile entry */
797 				LFS_IENTRY(ifp, fs, lastino, bp);
798 				/* set the list pointer */
799 				lfs_if_setnextfree(fs, ifp, ino);
800 				/* write the block */
801 				LFS_BWRITE_LOG(bp);
802 
803 				/* reload this inode's ifile entry */
804 				LFS_IENTRY(ifp, fs, ino, bp);
805 			}
806 			/* remember the last free inode seen so far */
807 			lastino = ino;
808 
809 			/* Mark this inode free in the in-memory freemap */
810 			SET_BITMAP_FREE(fs, ino);
811 		}
812 
813 		/* If moving to the next ifile block, release the buffer. */
814 		if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
815 			brelse(bp, 0);
816 	}
817 
818 	/* Write the freelist head and tail pointers */
819 	/* XXX: do we need to mark the superblock dirty? */
820 	LFS_PUT_HEADFREE(fs, cip, bp, firstino);
821 	LFS_PUT_TAILFREE(fs, cip, bp, lastino);
822 
823 	/* done */
824 	lfs_segunlock(fs);
825 
826 	/*
827 	 * Shrink the array of orphans so we don't have to carry around
828 	 * the allocation size.
829 	 */
830 	if (norphan < norphan_alloc) {
831 		ino_t *orphan_new = kmem_alloc(sizeof(orphan[0]) * norphan,
832 		    KM_SLEEP);
833 		memcpy(orphan_new, orphan, sizeof(orphan[0]) * norphan);
834 		kmem_free(orphan, sizeof(orphan[0]) * norphan_alloc);
835 		orphan = orphan_new;
836 		norphan_alloc = norphan;
837 	}
838 
839 	*orphanp = orphan;
840 	*norphanp = norphan;
841 }
842 
843 /*
844  * Mark a file orphaned (unlinked but not yet reclaimed) by inode
845  * number. Do this with a magic freelist next pointer.
846  *
847  * XXX: howzabout some locking?
848  */
849 void
lfs_orphan(struct lfs * fs,ino_t ino)850 lfs_orphan(struct lfs *fs, ino_t ino)
851 {
852 	IFILE *ifp;
853 	struct buf *bp;
854 
855 	LFS_IENTRY(ifp, fs, ino, bp);
856 	lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE(fs));
857 	LFS_BWRITE_LOG(bp);
858 }
859 
860 /*
861  * Free orphans discovered during mount.  This is a separate stage
862  * because it requires fs->lfs_suflags to be set up, which is not done
863  * by the time we run lfs_order_freelist.  It's possible that we could
864  * run lfs_order_freelist later (i.e., set up fs->lfs_suflags sooner)
865  * but that requires more thought than I can put into this at the
866  * moment.
867  */
868 void
lfs_free_orphans(struct lfs * fs,ino_t * orphan,size_t norphan)869 lfs_free_orphans(struct lfs *fs, ino_t *orphan, size_t norphan)
870 {
871 	size_t i;
872 
873 	for (i = 0; i < norphan; i++) {
874 		ino_t ino = orphan[i];
875 		unsigned segno;
876 		struct vnode *vp;
877 		struct inode *ip;
878 		struct buf *bp;
879 		IFILE *ifp;
880 		SEGUSE *sup;
881 		int error;
882 
883 		/* Get the segment the inode is in on disk.  */
884 		LFS_IENTRY(ifp, fs, ino, bp);
885 		segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
886 		brelse(bp, 0);
887 
888 		/*
889 		 * Try to get the vnode.  If we can't, tough -- hope
890 		 * you have backups!
891 		 */
892 		error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE,
893 		    &vp);
894 		if (error) {
895 			printf("orphan %jd vget error %d\n", (intmax_t)ino,
896 			    error);
897 			continue;
898 		}
899 
900 		/*
901 		 * Sanity-check the inode.
902 		 *
903 		 * XXX What to do if it is still referenced?
904 		 */
905 		ip = VTOI(vp);
906 		if (ip->i_nlink != 0)
907 			printf("orphan %jd nlink %d\n", (intmax_t)ino,
908 			    ip->i_nlink);
909 
910 		/*
911 		 * Truncate the inode, to free any blocks allocated for
912 		 * it, and release it, to free the inode number.
913 		 *
914 		 * XXX Isn't it redundant to truncate?  Won't vput do
915 		 * that for us?
916 		 */
917 		error = lfs_truncate(vp, 0, 0, NOCRED);
918 		if (error)
919 			printf("orphan %jd truncate error %d", (intmax_t)ino,
920 			    error);
921 		vput(vp);
922 
923 		/* Update the number of bytes in the segment summary.  */
924 		LFS_SEGENTRY(sup, fs, segno, bp);
925 		KASSERT(sup->su_nbytes >= DINOSIZE(fs));
926 		sup->su_nbytes -= DINOSIZE(fs);
927 		LFS_WRITESEGENTRY(sup, fs, segno, bp);
928 
929 		/* Drop the on-disk address.  */
930 		LFS_IENTRY(ifp, fs, ino, bp);
931 		lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
932 		LFS_BWRITE_LOG(bp);
933 	}
934 
935 	if (orphan)
936 		kmem_free(orphan, sizeof(orphan[0]) * norphan);
937 }
938