1 /* $NetBSD: lfs_alloc.c,v 1.141 2020/02/23 08:49:46 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * Copyright (c) 1991, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94
60 */
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.141 2020/02/23 08:49:46 riastradh Exp $");
64
65 #if defined(_KERNEL_OPT)
66 #include "opt_quota.h"
67 #endif
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/buf.h>
73 #include <sys/lock.h>
74 #include <sys/vnode.h>
75 #include <sys/syslog.h>
76 #include <sys/mount.h>
77 #include <sys/malloc.h>
78 #include <sys/pool.h>
79 #include <sys/proc.h>
80 #include <sys/kauth.h>
81
82 #include <ufs/lfs/ulfs_quotacommon.h>
83 #include <ufs/lfs/ulfs_inode.h>
84 #include <ufs/lfs/ulfsmount.h>
85 #include <ufs/lfs/ulfs_extern.h>
86
87 #include <ufs/lfs/lfs.h>
88 #include <ufs/lfs/lfs_accessors.h>
89 #include <ufs/lfs/lfs_extern.h>
90 #include <ufs/lfs/lfs_kernel.h>
91
92 /* Constants for inode free bitmap */
93 #define BMSHIFT 5 /* 2 ** 5 = 32 */
94 #define BMMASK ((1 << BMSHIFT) - 1)
95 #define SET_BITMAP_FREE(F, I) do { \
96 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), \
97 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
98 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1U << ((I) & BMMASK)); \
99 } while (0)
100 #define CLR_BITMAP_FREE(F, I) do { \
101 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), \
102 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
103 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1U << ((I) & BMMASK)); \
104 } while(0)
105
106 #define ISSET_BITMAP_FREE(F, I) \
107 ((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1U << ((I) & BMMASK)))
108
109 /*
110 * Add a new block to the Ifile, to accommodate future file creations.
111 * Called with the segment lock held.
112 */
113 int
lfs_extend_ifile(struct lfs * fs,kauth_cred_t cred)114 lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
115 {
116 struct vnode *vp;
117 struct inode *ip;
118 IFILE64 *ifp64;
119 IFILE32 *ifp32;
120 IFILE_V1 *ifp_v1;
121 struct buf *bp, *cbp;
122 int error;
123 daddr_t i, blkno, xmax;
124 ino_t oldlast, maxino;
125 CLEANERINFO *cip;
126
127 ASSERT_SEGLOCK(fs);
128
129 /* XXX should check or assert that we aren't readonly. */
130
131 /*
132 * Get a block and extend the ifile inode. Leave the buffer for
133 * the block in bp.
134 */
135
136 vp = fs->lfs_ivnode;
137 ip = VTOI(vp);
138 blkno = lfs_lblkno(fs, ip->i_size);
139 if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
140 &bp)) != 0) {
141 return (error);
142 }
143 ip->i_size += lfs_sb_getbsize(fs);
144 lfs_dino_setsize(fs, ip->i_din, ip->i_size);
145 uvm_vnp_setsize(vp, ip->i_size);
146
147 /*
148 * Compute the new number of inodes, and reallocate the in-memory
149 * inode freemap.
150 */
151
152 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
153 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
154 fs->lfs_ino_bitmap = (lfs_bm_t *)
155 realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
156 sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
157 KASSERT(fs->lfs_ino_bitmap != NULL);
158
159 /* first new inode number */
160 i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
161 lfs_sb_getifpb(fs);
162
163 /*
164 * We insert the new inodes at the head of the free list.
165 * Under normal circumstances, the free list is empty here,
166 * so we are also incidentally placing them at the end (which
167 * we must do if we are to keep them in order).
168 */
169 LFS_GET_HEADFREE(fs, cip, cbp, &oldlast);
170 LFS_PUT_HEADFREE(fs, cip, cbp, i);
171 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
172 "inode 0 allocated [2]");
173
174 /* inode number to stop at (XXX: why *x*max?) */
175 xmax = i + lfs_sb_getifpb(fs);
176
177 /*
178 * Initialize the ifile block.
179 *
180 * XXX: these loops should be restructured to use the accessor
181 * functions instead of using cutpaste polymorphism.
182 */
183
184 if (fs->lfs_is64) {
185 for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
186 SET_BITMAP_FREE(fs, i);
187 ifp64->if_version = 1;
188 ifp64->if_daddr = LFS_UNUSED_DADDR;
189 ifp64->if_nextfree = ++i;
190 }
191 ifp64--;
192 ifp64->if_nextfree = oldlast;
193 } else if (lfs_sb_getversion(fs) > 1) {
194 for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
195 SET_BITMAP_FREE(fs, i);
196 ifp32->if_version = 1;
197 ifp32->if_daddr = LFS_UNUSED_DADDR;
198 ifp32->if_nextfree = ++i;
199 }
200 ifp32--;
201 ifp32->if_nextfree = oldlast;
202 } else {
203 for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
204 SET_BITMAP_FREE(fs, i);
205 ifp_v1->if_version = 1;
206 ifp_v1->if_daddr = LFS_UNUSED_DADDR;
207 ifp_v1->if_nextfree = ++i;
208 }
209 ifp_v1--;
210 ifp_v1->if_nextfree = oldlast;
211 }
212 LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1);
213
214 /*
215 * Write out the new block.
216 */
217
218 (void) LFS_BWRITE_LOG(bp); /* Ifile */
219
220 return 0;
221 }
222
223 /*
224 * Allocate an inode for a new file.
225 *
226 * Takes the segment lock. Also (while holding it) takes lfs_lock
227 * to frob fs->lfs_fmod.
228 *
229 * XXX: the mode argument is unused; should just get rid of it.
230 */
231 /* ARGSUSED */
232 /* VOP_BWRITE 2i times */
233 int
lfs_valloc(struct vnode * pvp,int mode,kauth_cred_t cred,ino_t * ino,int * gen)234 lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
235 ino_t *ino, int *gen)
236 {
237 struct lfs *fs;
238 struct buf *bp, *cbp;
239 IFILE *ifp;
240 int error;
241 CLEANERINFO *cip;
242
243 fs = VTOI(pvp)->i_lfs;
244 if (fs->lfs_ronly)
245 return EROFS;
246
247 ASSERT_NO_SEGLOCK(fs);
248
249 lfs_seglock(fs, SEGM_PROT);
250
251 /* Get the head of the freelist. */
252 LFS_GET_HEADFREE(fs, cip, cbp, ino);
253
254 /* paranoia */
255 KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
256 DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
257 *ino));
258
259 /* Update the in-memory inode freemap */
260 CLR_BITMAP_FREE(fs, *ino);
261
262 /*
263 * Fetch the ifile entry and make sure the inode is really
264 * free.
265 */
266 LFS_IENTRY(ifp, fs, *ino, bp);
267 if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
268 panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
269 *ino);
270
271 /* Update the inode freelist head in the superblock. */
272 LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
273 DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
274 *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
275
276 /*
277 * Retrieve the version number from the ifile entry. It was
278 * bumped by vfree, so don't bump it again.
279 */
280 *gen = lfs_if_getversion(fs, ifp);
281
282 /* Done with ifile entry */
283 brelse(bp, 0);
284
285 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
286 /*
287 * No more inodes; extend the ifile so that the next
288 * lfs_valloc will succeed.
289 */
290 if ((error = lfs_extend_ifile(fs, cred)) != 0) {
291 /* restore the freelist */
292 LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
293
294 /* unlock and return */
295 lfs_segunlock(fs);
296 return error;
297 }
298 }
299 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
300 "inode 0 allocated [3]");
301
302 /* Set superblock modified bit */
303 mutex_enter(&lfs_lock);
304 fs->lfs_fmod = 1;
305 mutex_exit(&lfs_lock);
306
307 /* increment file count */
308 lfs_sb_addnfiles(fs, 1);
309
310 /* done */
311 lfs_segunlock(fs);
312 return 0;
313 }
314
315 /*
316 * Allocate an inode for a new file, with given inode number and
317 * version.
318 *
319 * Called in the same context as lfs_valloc and therefore shares the
320 * same locking assumptions.
321 */
322 int
lfs_valloc_fixed(struct lfs * fs,ino_t ino,int vers)323 lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
324 {
325 IFILE *ifp;
326 struct buf *bp, *cbp;
327 ino_t headino, thisino, oldnext;
328 CLEANERINFO *cip;
329
330 if (fs->lfs_ronly)
331 return EROFS;
332
333 ASSERT_NO_SEGLOCK(fs);
334
335 lfs_seglock(fs, SEGM_PROT);
336
337 /*
338 * If the ifile is too short to contain this inum, extend it.
339 *
340 * XXX: lfs_extend_ifile should take a size instead of always
341 * doing just one block at time.
342 */
343 while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
344 lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
345 << lfs_sb_getbshift(fs)) {
346 lfs_extend_ifile(fs, NOCRED);
347 }
348
349 /*
350 * fetch the ifile entry; get the inode freelist next pointer,
351 * and set the version as directed.
352 */
353 LFS_IENTRY(ifp, fs, ino, bp);
354 oldnext = lfs_if_getnextfree(fs, ifp);
355 lfs_if_setversion(fs, ifp, vers);
356 brelse(bp, 0);
357
358 /* Get head of inode freelist */
359 LFS_GET_HEADFREE(fs, cip, cbp, &headino);
360 if (headino == ino) {
361 /* Easy case: the inode we wanted was at the head */
362 LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
363 } else {
364 ino_t nextfree;
365
366 /* Have to find the desired inode in the freelist... */
367
368 thisino = headino;
369 while (1) {
370 /* read this ifile entry */
371 LFS_IENTRY(ifp, fs, thisino, bp);
372 nextfree = lfs_if_getnextfree(fs, ifp);
373 /* stop if we find it or we hit the end */
374 if (nextfree == ino ||
375 nextfree == LFS_UNUSED_INUM)
376 break;
377 /* nope, keep going... */
378 thisino = nextfree;
379 brelse(bp, 0);
380 }
381 if (nextfree == LFS_UNUSED_INUM) {
382 /* hit the end -- this inode is not available */
383 brelse(bp, 0);
384 lfs_segunlock(fs);
385 return ENOENT;
386 }
387 /* found it; update the next pointer */
388 lfs_if_setnextfree(fs, ifp, oldnext);
389 /* write the ifile block */
390 LFS_BWRITE_LOG(bp);
391 }
392
393 /* done */
394 lfs_segunlock(fs);
395 return 0;
396 }
397
398 #if 0
399 /*
400 * Find the highest-numbered allocated inode.
401 * This will be used to shrink the Ifile.
402 */
403 static inline ino_t
404 lfs_last_alloc_ino(struct lfs *fs)
405 {
406 ino_t ino, maxino;
407
408 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
409 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
410 lfs_sb_getifpb(fs);
411 for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
412 if (ISSET_BITMAP_FREE(fs, ino) == 0)
413 break;
414 }
415 return ino;
416 }
417 #endif
418
419 /*
420 * Find the previous (next lowest numbered) free inode, if any.
421 * If there is none, return LFS_UNUSED_INUM.
422 *
423 * XXX: locking?
424 */
425 static inline ino_t
lfs_freelist_prev(struct lfs * fs,ino_t ino)426 lfs_freelist_prev(struct lfs *fs, ino_t ino)
427 {
428 ino_t tino, bound, bb, freehdbb;
429
430 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
431 /* No free inodes at all */
432 return LFS_UNUSED_INUM;
433 }
434
435 /* Search our own word first */
436 bound = ino & ~BMMASK;
437 for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
438 if (ISSET_BITMAP_FREE(fs, tino))
439 return tino;
440 /* If there are no lower words to search, just return */
441 if (ino >> BMSHIFT == 0)
442 return LFS_UNUSED_INUM;
443
444 /*
445 * Find a word with a free inode in it. We have to be a bit
446 * careful here since ino_t is unsigned.
447 */
448 freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
449 for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
450 if (fs->lfs_ino_bitmap[bb])
451 break;
452 if (fs->lfs_ino_bitmap[bb] == 0)
453 return LFS_UNUSED_INUM;
454
455 /* Search the word we found */
456 for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
457 tino > LFS_UNUSED_INUM; tino--)
458 if (ISSET_BITMAP_FREE(fs, tino))
459 break;
460
461 /* Avoid returning reserved inode numbers */
462 if (tino <= LFS_IFILE_INUM)
463 tino = LFS_UNUSED_INUM;
464
465 return tino;
466 }
467
468 /*
469 * Free an inode.
470 *
471 * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
472 */
473 /* ARGUSED */
474 /* VOP_BWRITE 2i times */
475 int
lfs_vfree(struct vnode * vp,ino_t ino,int mode)476 lfs_vfree(struct vnode *vp, ino_t ino, int mode)
477 {
478 SEGUSE *sup;
479 CLEANERINFO *cip;
480 struct buf *cbp, *bp;
481 IFILE *ifp;
482 struct inode *ip;
483 struct lfs *fs;
484 daddr_t old_iaddr;
485 ino_t otail;
486
487 /* Get the inode number and file system. */
488 ip = VTOI(vp);
489 fs = ip->i_lfs;
490 ino = ip->i_number;
491
492 /* XXX: assert not readonly */
493
494 ASSERT_NO_SEGLOCK(fs);
495 DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
496
497 /* Drain of pending writes */
498 mutex_enter(vp->v_interlock);
499 while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
500 cv_wait(&vp->v_cv, vp->v_interlock);
501 }
502 mutex_exit(vp->v_interlock);
503
504 lfs_seglock(fs, SEGM_PROT);
505
506 /*
507 * If the inode was in a dirop, it isn't now.
508 *
509 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP)
510 * not updated together in one function? (and why do both exist,
511 * anyway?)
512 */
513 UNMARK_VNODE(vp);
514
515 mutex_enter(&lfs_lock);
516 if (vp->v_uflag & VU_DIROP) {
517 vp->v_uflag &= ~VU_DIROP;
518 --lfs_dirvcount;
519 --fs->lfs_dirvcount;
520 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
521 wakeup(&fs->lfs_dirvcount);
522 wakeup(&lfs_dirvcount);
523 mutex_exit(&lfs_lock);
524 vrele(vp);
525
526 /*
527 * If this inode is not going to be written any more, any
528 * segment accounting left over from its truncation needs
529 * to occur at the end of the next dirops flush. Attach
530 * them to the fs-wide list for that purpose.
531 */
532 if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
533 struct segdelta *sd;
534
535 while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
536 LIST_REMOVE(sd, list);
537 LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
538 }
539 }
540 } else {
541 /*
542 * If it's not a dirop, we can finalize right away.
543 */
544 mutex_exit(&lfs_lock);
545 lfs_finalize_ino_seguse(fs, ip);
546 }
547
548 /* it is no longer an unwritten inode, so update the counts */
549 mutex_enter(&lfs_lock);
550 LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
551 mutex_exit(&lfs_lock);
552
553 /* Turn off all inode modification flags */
554 ip->i_state &= ~IN_ALLMOD;
555
556 /* Mark it deleted */
557 ip->i_lfs_iflags |= LFSI_DELETED;
558
559 /* Mark it free in the in-memory inode freemap */
560 SET_BITMAP_FREE(fs, ino);
561
562 /*
563 * Set the ifile's inode entry to unused, increment its version number
564 * and link it onto the free chain.
565 */
566
567 /* fetch the ifile entry */
568 LFS_IENTRY(ifp, fs, ino, bp);
569
570 /* update the on-disk address (to "nowhere") */
571 old_iaddr = lfs_if_getdaddr(fs, ifp);
572 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
573
574 /* bump the version */
575 lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
576
577 if (lfs_sb_getversion(fs) == 1) {
578 ino_t nextfree;
579
580 /* insert on freelist */
581 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
582 lfs_if_setnextfree(fs, ifp, nextfree);
583 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
584
585 /* write the ifile block */
586 (void) LFS_BWRITE_LOG(bp); /* Ifile */
587 } else {
588 ino_t tino, onf;
589
590 /*
591 * Clear the freelist next pointer and write the ifile
592 * block. XXX: why? I'm sure there must be a reason but
593 * it seems both silly and dangerous.
594 */
595 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
596 (void) LFS_BWRITE_LOG(bp); /* Ifile */
597
598 /*
599 * Insert on freelist in order.
600 */
601
602 /* Find the next lower (by number) free inode */
603 tino = lfs_freelist_prev(fs, ino);
604
605 if (tino == LFS_UNUSED_INUM) {
606 ino_t nextfree;
607
608 /*
609 * There isn't one; put us on the freelist head.
610 */
611
612 /* reload the ifile block */
613 LFS_IENTRY(ifp, fs, ino, bp);
614 /* update the list */
615 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
616 lfs_if_setnextfree(fs, ifp, nextfree);
617 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
618 DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
619 (long long)nextfree, (long long)ino));
620 /* write the ifile block */
621 LFS_BWRITE_LOG(bp); /* Ifile */
622
623 /* If the list was empty, set tail too */
624 LFS_GET_TAILFREE(fs, cip, cbp, &otail);
625 if (otail == LFS_UNUSED_INUM) {
626 LFS_PUT_TAILFREE(fs, cip, cbp, ino);
627 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
628 "-> %lld\n", (long long)otail,
629 (long long)ino));
630 }
631 } else {
632 /*
633 * Insert this inode into the list after tino.
634 * We hold the segment lock so we don't have to
635 * worry about blocks being written out of order.
636 */
637
638 DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
639 " after %lld\n", ino, tino));
640
641 /* load the previous inode's ifile block */
642 LFS_IENTRY(ifp, fs, tino, bp);
643 /* update the list pointer */
644 onf = lfs_if_getnextfree(fs, ifp);
645 lfs_if_setnextfree(fs, ifp, ino);
646 /* write the block */
647 LFS_BWRITE_LOG(bp); /* Ifile */
648
649 /* load this inode's ifile block */
650 LFS_IENTRY(ifp, fs, ino, bp);
651 /* update the list pointer */
652 lfs_if_setnextfree(fs, ifp, onf);
653 /* write the block */
654 LFS_BWRITE_LOG(bp); /* Ifile */
655
656 /* If we're last, put us on the tail */
657 if (onf == LFS_UNUSED_INUM) {
658 LFS_GET_TAILFREE(fs, cip, cbp, &otail);
659 LFS_PUT_TAILFREE(fs, cip, cbp, ino);
660 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
661 "-> %lld\n", (long long)otail,
662 (long long)ino));
663 }
664 }
665 }
666 /* XXX: shouldn't this check be further up *before* we trash the fs? */
667 KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed");
668
669 /*
670 * Update the segment summary for the segment where the on-disk
671 * copy used to be.
672 */
673 if (old_iaddr != LFS_UNUSED_DADDR) {
674 /* load it */
675 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
676 /* the number of bytes in the segment should not become < 0 */
677 KASSERTMSG((sup->su_nbytes >= DINOSIZE(fs)),
678 "lfs_vfree: negative byte count"
679 " (segment %" PRIu32 " short by %d)\n",
680 lfs_dtosn(fs, old_iaddr),
681 (int)DINOSIZE(fs) - sup->su_nbytes);
682 /* update the number of bytes in the segment */
683 sup->su_nbytes -= DINOSIZE(fs);
684 /* write the segment entry */
685 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
686 }
687
688 /* Set superblock modified bit. */
689 mutex_enter(&lfs_lock);
690 fs->lfs_fmod = 1;
691 mutex_exit(&lfs_lock);
692
693 /* Decrement file count. */
694 lfs_sb_subnfiles(fs, 1);
695
696 lfs_segunlock(fs);
697
698 return (0);
699 }
700
701 /*
702 * Sort the freelist and set up the free-inode bitmap.
703 * To be called by lfs_mountfs().
704 *
705 * Takes the segmenet lock.
706 */
707 void
lfs_order_freelist(struct lfs * fs,ino_t ** orphanp,size_t * norphanp)708 lfs_order_freelist(struct lfs *fs, ino_t **orphanp, size_t *norphanp)
709 {
710 CLEANERINFO *cip;
711 IFILE *ifp = NULL;
712 struct buf *bp;
713 ino_t ino, firstino, lastino, maxino;
714 ino_t *orphan = NULL;
715 size_t norphan = 0;
716 size_t norphan_alloc = 0;
717
718 ASSERT_NO_SEGLOCK(fs);
719 lfs_seglock(fs, SEGM_PROT);
720
721 /* largest inode on fs */
722 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
723 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
724
725 /* allocate the in-memory inode freemap */
726 /* XXX: assert that fs->lfs_ino_bitmap is null here */
727 fs->lfs_ino_bitmap =
728 malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
729 M_SEGMENT, M_WAITOK | M_ZERO);
730 KASSERT(fs->lfs_ino_bitmap != NULL);
731
732 /*
733 * Scan the ifile.
734 */
735
736 firstino = lastino = LFS_UNUSED_INUM;
737 for (ino = 0; ino < maxino; ino++) {
738 /* Load this inode's ifile entry. */
739 if (ino % lfs_sb_getifpb(fs) == 0)
740 LFS_IENTRY(ifp, fs, ino, bp);
741 else
742 LFS_IENTRY_NEXT(ifp, fs);
743
744 /* Don't put zero or ifile on the free list */
745 if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
746 continue;
747
748 /*
749 * Address orphaned files.
750 *
751 * The idea of this is to free inodes belonging to
752 * files that were unlinked but not reclaimed, I guess
753 * because if we're going to scan the whole ifile
754 * anyway it costs very little to do this. I don't
755 * immediately see any reason this should be disabled,
756 * but presumably it doesn't work... not sure what
757 * happens to such files currently. -- dholland 20160806
758 */
759 if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) {
760 if (orphan == NULL) {
761 norphan_alloc = 32; /* XXX pulled from arse */
762 orphan = kmem_zalloc(sizeof(orphan[0]) *
763 norphan_alloc, KM_SLEEP);
764 } else if (norphan == norphan_alloc) {
765 ino_t *orphan_new;
766 if (norphan_alloc >= 4096)
767 norphan_alloc += 4096;
768 else
769 norphan_alloc *= 2;
770 orphan_new = kmem_zalloc(sizeof(orphan[0]) *
771 norphan_alloc, KM_SLEEP);
772 memcpy(orphan_new, orphan, sizeof(orphan[0]) *
773 norphan);
774 kmem_free(orphan, sizeof(orphan[0]) * norphan);
775 orphan = orphan_new;
776 }
777 orphan[norphan++] = ino;
778 }
779
780 if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
781
782 /*
783 * This inode is free. Put it on the free list.
784 */
785
786 if (firstino == LFS_UNUSED_INUM) {
787 /* XXX: assert lastino == LFS_UNUSED_INUM? */
788 /* remember the first free inode */
789 firstino = ino;
790 } else {
791 /* release this inode's ifile entry */
792 brelse(bp, 0);
793
794 /* XXX: assert lastino != LFS_UNUSED_INUM? */
795
796 /* load lastino's ifile entry */
797 LFS_IENTRY(ifp, fs, lastino, bp);
798 /* set the list pointer */
799 lfs_if_setnextfree(fs, ifp, ino);
800 /* write the block */
801 LFS_BWRITE_LOG(bp);
802
803 /* reload this inode's ifile entry */
804 LFS_IENTRY(ifp, fs, ino, bp);
805 }
806 /* remember the last free inode seen so far */
807 lastino = ino;
808
809 /* Mark this inode free in the in-memory freemap */
810 SET_BITMAP_FREE(fs, ino);
811 }
812
813 /* If moving to the next ifile block, release the buffer. */
814 if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
815 brelse(bp, 0);
816 }
817
818 /* Write the freelist head and tail pointers */
819 /* XXX: do we need to mark the superblock dirty? */
820 LFS_PUT_HEADFREE(fs, cip, bp, firstino);
821 LFS_PUT_TAILFREE(fs, cip, bp, lastino);
822
823 /* done */
824 lfs_segunlock(fs);
825
826 /*
827 * Shrink the array of orphans so we don't have to carry around
828 * the allocation size.
829 */
830 if (norphan < norphan_alloc) {
831 ino_t *orphan_new = kmem_alloc(sizeof(orphan[0]) * norphan,
832 KM_SLEEP);
833 memcpy(orphan_new, orphan, sizeof(orphan[0]) * norphan);
834 kmem_free(orphan, sizeof(orphan[0]) * norphan_alloc);
835 orphan = orphan_new;
836 norphan_alloc = norphan;
837 }
838
839 *orphanp = orphan;
840 *norphanp = norphan;
841 }
842
843 /*
844 * Mark a file orphaned (unlinked but not yet reclaimed) by inode
845 * number. Do this with a magic freelist next pointer.
846 *
847 * XXX: howzabout some locking?
848 */
849 void
lfs_orphan(struct lfs * fs,ino_t ino)850 lfs_orphan(struct lfs *fs, ino_t ino)
851 {
852 IFILE *ifp;
853 struct buf *bp;
854
855 LFS_IENTRY(ifp, fs, ino, bp);
856 lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE(fs));
857 LFS_BWRITE_LOG(bp);
858 }
859
860 /*
861 * Free orphans discovered during mount. This is a separate stage
862 * because it requires fs->lfs_suflags to be set up, which is not done
863 * by the time we run lfs_order_freelist. It's possible that we could
864 * run lfs_order_freelist later (i.e., set up fs->lfs_suflags sooner)
865 * but that requires more thought than I can put into this at the
866 * moment.
867 */
868 void
lfs_free_orphans(struct lfs * fs,ino_t * orphan,size_t norphan)869 lfs_free_orphans(struct lfs *fs, ino_t *orphan, size_t norphan)
870 {
871 size_t i;
872
873 for (i = 0; i < norphan; i++) {
874 ino_t ino = orphan[i];
875 unsigned segno;
876 struct vnode *vp;
877 struct inode *ip;
878 struct buf *bp;
879 IFILE *ifp;
880 SEGUSE *sup;
881 int error;
882
883 /* Get the segment the inode is in on disk. */
884 LFS_IENTRY(ifp, fs, ino, bp);
885 segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
886 brelse(bp, 0);
887
888 /*
889 * Try to get the vnode. If we can't, tough -- hope
890 * you have backups!
891 */
892 error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE,
893 &vp);
894 if (error) {
895 printf("orphan %jd vget error %d\n", (intmax_t)ino,
896 error);
897 continue;
898 }
899
900 /*
901 * Sanity-check the inode.
902 *
903 * XXX What to do if it is still referenced?
904 */
905 ip = VTOI(vp);
906 if (ip->i_nlink != 0)
907 printf("orphan %jd nlink %d\n", (intmax_t)ino,
908 ip->i_nlink);
909
910 /*
911 * Truncate the inode, to free any blocks allocated for
912 * it, and release it, to free the inode number.
913 *
914 * XXX Isn't it redundant to truncate? Won't vput do
915 * that for us?
916 */
917 error = lfs_truncate(vp, 0, 0, NOCRED);
918 if (error)
919 printf("orphan %jd truncate error %d", (intmax_t)ino,
920 error);
921 vput(vp);
922
923 /* Update the number of bytes in the segment summary. */
924 LFS_SEGENTRY(sup, fs, segno, bp);
925 KASSERT(sup->su_nbytes >= DINOSIZE(fs));
926 sup->su_nbytes -= DINOSIZE(fs);
927 LFS_WRITESEGENTRY(sup, fs, segno, bp);
928
929 /* Drop the on-disk address. */
930 LFS_IENTRY(ifp, fs, ino, bp);
931 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
932 LFS_BWRITE_LOG(bp);
933 }
934
935 if (orphan)
936 kmem_free(orphan, sizeof(orphan[0]) * norphan);
937 }
938