xref: /netbsd-src/sys/ufs/ext2fs/ext2fs_alloc.c (revision e4ebea9efd33d7fbff602d6288b15240e56427d2)
1 /*	$NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
32  *  Modified for ext2fs by Manuel Bouyer.
33  */
34 
35 /*
36  * Copyright (c) 1997 Manuel Bouyer.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57  *
58  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
59  *  Modified for ext2fs by Manuel Bouyer.
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $");
64 
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/buf.h>
68 #include <sys/proc.h>
69 #include <sys/vnode.h>
70 #include <sys/mount.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <sys/kauth.h>
74 
75 #include <lib/libkern/crc16.h>
76 
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/ufsmount.h>
80 
81 #include <ufs/ext2fs/ext2fs.h>
82 #include <ufs/ext2fs/ext2fs_extern.h>
83 
84 u_long ext2gennumber;
85 
86 static daddr_t	ext2fs_alloccg(struct inode *, int, daddr_t, int);
87 static u_long	ext2fs_dirpref(struct m_ext2fs *);
88 static void	ext2fs_fserr(struct m_ext2fs *, u_int, const char *);
89 static u_long	ext2fs_hashalloc(struct inode *, int, long, int,
90 		    daddr_t (*)(struct inode *, int, daddr_t, int));
91 static daddr_t	ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
92 static daddr_t	ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
93 static __inline void	ext2fs_cg_update(struct m_ext2fs *, int,
94     struct ext2_gd *, int, int, int, daddr_t);
95 static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
96 static void	ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *,
97     char *);
98 
99 /*
100  * Allocate a block in the file system.
101  *
102  * A preference may be optionally specified. If a preference is given
103  * the following hierarchy is used to allocate a block:
104  *   1) allocate the requested block.
105  *   2) allocate a rotationally optimal block in the same cylinder.
106  *   3) allocate a block in the same cylinder group.
107  *   4) quadradically rehash into other cylinder groups, until an
108  *	  available block is located.
109  * If no block preference is given the following hierarchy is used
110  * to allocate a block:
111  *   1) allocate a block in the cylinder group that contains the
112  *	  inode for the file.
113  *   2) quadradically rehash into other cylinder groups, until an
114  *	  available block is located.
115  */
116 int
ext2fs_alloc(struct inode * ip,daddr_t lbn,daddr_t bpref,kauth_cred_t cred,daddr_t * bnp)117 ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref,
118     kauth_cred_t cred, daddr_t *bnp)
119 {
120 	struct m_ext2fs *fs;
121 	daddr_t bno;
122 	int cg;
123 
124 	*bnp = 0;
125 	fs = ip->i_e2fs;
126 #ifdef DIAGNOSTIC
127 	if (cred == NOCRED)
128 		panic("ext2fs_alloc: missing credential");
129 #endif /* DIAGNOSTIC */
130 	if (fs->e2fs.e2fs_fbcount == 0)
131 		goto nospace;
132 	if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
133 	    NULL, NULL) != 0 &&
134 	    freespace(fs) <= 0)
135 		goto nospace;
136 	if (bpref >= fs->e2fs.e2fs_bcount)
137 		bpref = 0;
138 	if (bpref == 0)
139 		cg = ino_to_cg(fs, ip->i_number);
140 	else
141 		cg = dtog(fs, bpref);
142 	bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
143 	    ext2fs_alloccg);
144 	if (bno > 0) {
145 		ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize));
146 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
147 		*bnp = bno;
148 		return 0;
149 	}
150 nospace:
151 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
152 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
153 	return ENOSPC;
154 }
155 
156 /*
157  * Allocate an inode in the file system.
158  *
159  * If allocating a directory, use ext2fs_dirpref to select the inode.
160  * If allocating in a directory, the following hierarchy is followed:
161  *   1) allocate the preferred inode.
162  *   2) allocate an inode in the same cylinder group.
163  *   3) quadradically rehash into other cylinder groups, until an
164  *	  available inode is located.
165  * If no inode preference is given the following hierarchy is used
166  * to allocate an inode:
167  *   1) allocate an inode in cylinder group 0.
168  *   2) quadradically rehash into other cylinder groups, until an
169  *	  available inode is located.
170  */
171 int
ext2fs_valloc(struct vnode * pvp,int mode,kauth_cred_t cred,ino_t * inop)172 ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop)
173 {
174 	struct inode *pip;
175 	struct m_ext2fs *fs;
176 	ino_t ino, ipref;
177 	int cg;
178 
179 	pip = VTOI(pvp);
180 	fs = pip->i_e2fs;
181 	if (fs->e2fs.e2fs_ficount == 0)
182 		goto noinodes;
183 
184 	if ((mode & IFMT) == IFDIR)
185 		cg = ext2fs_dirpref(fs);
186 	else
187 		cg = ino_to_cg(fs, pip->i_number);
188 	ipref = cg * fs->e2fs.e2fs_ipg + 1;
189 	ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
190 	if (ino == 0)
191 		goto noinodes;
192 
193 	*inop = ino;
194 	return 0;
195 
196 noinodes:
197 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
198 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
199 	return ENOSPC;
200 }
201 
202 /*
203  * Find a cylinder to place a directory.
204  *
205  * The policy implemented by this algorithm is to select from
206  * among those cylinder groups with above the average number of
207  * free inodes, the one with the smallest number of directories.
208  */
209 static u_long
ext2fs_dirpref(struct m_ext2fs * fs)210 ext2fs_dirpref(struct m_ext2fs *fs)
211 {
212 	int cg, maxspace, mincg, avgifree;
213 
214 	avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
215 	maxspace = 0;
216 	mincg = -1;
217 	for (cg = 0; cg < fs->e2fs_ncg; cg++) {
218 		uint32_t nifree =
219 		    (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree_hi) << 16)
220 		    | fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree);
221 		if (nifree < avgifree) {
222 			continue;
223 		}
224 		uint32_t nbfree =
225 		    (fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree_hi) << 16)
226 		    | fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
227 		if (mincg == -1 || nbfree > maxspace) {
228 			mincg = cg;
229 			maxspace = nbfree;
230 		}
231 	}
232 	return mincg;
233 }
234 
235 /*
236  * Select the desired position for the next block in a file.  The file is
237  * logically divided into sections. The first section is composed of the
238  * direct blocks. Each additional section contains fs_maxbpg blocks.
239  *
240  * If no blocks have been allocated in the first section, the policy is to
241  * request a block in the same cylinder group as the inode that describes
242  * the file. Otherwise, the policy is to try to allocate the blocks
243  * contiguously. The two fields of the ext2 inode extension (see
244  * ufs/ufs/inode.h) help this.
245  */
246 daddr_t
ext2fs_blkpref(struct inode * ip,daddr_t lbn,int indx,int32_t * bap)247 ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx,
248 		int32_t *bap /* XXX ondisk32 */)
249 {
250 	struct m_ext2fs *fs;
251 	int cg, i;
252 
253 	fs = ip->i_e2fs;
254 	/*
255 	 * if we are doing contiguous lbn allocation, try to alloc blocks
256 	 * contiguously on disk
257 	 */
258 
259 	if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
260 		return ip->i_e2fs_last_blk + 1;
261 	}
262 
263 	/*
264 	 * bap, if provided, gives us a list of blocks to which we want to
265 	 * stay close
266 	 */
267 
268 	if (bap) {
269 		for (i = indx; i >= 0 ; i--) {
270 			if (bap[i]) {
271 				return fs2h32(bap[i]) + 1;
272 			}
273 		}
274 	}
275 
276 	/* fall back to the first block of the cylinder containing the inode */
277 
278 	cg = ino_to_cg(fs, ip->i_number);
279 	return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
280 }
281 
282 /*
283  * Implement the cylinder overflow algorithm.
284  *
285  * The policy implemented by this algorithm is:
286  *   1) allocate the block in its requested cylinder group.
287  *   2) quadradically rehash on the cylinder group number.
288  *   3) brute force search for a free block.
289  */
290 static u_long
ext2fs_hashalloc(struct inode * ip,int cg,long pref,int size,daddr_t (* allocator)(struct inode *,int,daddr_t,int))291 ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
292 		daddr_t (*allocator)(struct inode *, int, daddr_t, int))
293 {
294 	struct m_ext2fs *fs;
295 	long result;
296 	int i, icg = cg;
297 
298 	fs = ip->i_e2fs;
299 	/*
300 	 * 1: preferred cylinder group
301 	 */
302 	result = (*allocator)(ip, cg, pref, size);
303 	if (result)
304 		return result;
305 	/*
306 	 * 2: quadratic rehash
307 	 */
308 	for (i = 1; i < fs->e2fs_ncg; i *= 2) {
309 		cg += i;
310 		if (cg >= fs->e2fs_ncg)
311 			cg -= fs->e2fs_ncg;
312 		result = (*allocator)(ip, cg, 0, size);
313 		if (result)
314 			return result;
315 	}
316 	/*
317 	 * 3: brute force search
318 	 * Note that we start at i == 2, since 0 was checked initially,
319 	 * and 1 is always checked in the quadratic rehash.
320 	 */
321 	cg = (icg + 2) % fs->e2fs_ncg;
322 	for (i = 2; i < fs->e2fs_ncg; i++) {
323 		result = (*allocator)(ip, cg, 0, size);
324 		if (result)
325 			return result;
326 		cg++;
327 		if (cg == fs->e2fs_ncg)
328 			cg = 0;
329 	}
330 	return 0;
331 }
332 
333 /*
334  * Determine whether a block can be allocated.
335  *
336  * Check to see if a block of the appropriate size is available,
337  * and if it is, allocate it.
338  */
339 
340 static daddr_t
ext2fs_alloccg(struct inode * ip,int cg,daddr_t bpref,int size)341 ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
342 {
343 	struct m_ext2fs *fs;
344 	char *bbp;
345 	struct buf *bp;
346 	int error, bno, start, end, loc;
347 
348 	fs = ip->i_e2fs;
349 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0 &&
350 	    fs->e2fs_gd[cg].ext2bgd_nbfree_hi == 0)
351 		return 0;
352 	error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
353 		fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
354 		fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
355 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
356 	if (error) {
357 		return 0;
358 	}
359 	bbp = (char *)bp->b_data;
360 
361 	if (dtog(fs, bpref) != cg)
362 		bpref = 0;
363 
364 	/* initialize block bitmap now if uninit */
365 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
366 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) {
367 		ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp);
368 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT);
369 	}
370 
371 	if (bpref != 0) {
372 		bpref = dtogd(fs, bpref);
373 		/*
374 		 * if the requested block is available, use it
375 		 */
376 		if (isclr(bbp, bpref)) {
377 			bno = bpref;
378 			goto gotit;
379 		}
380 	}
381 	/*
382 	 * no blocks in the requested cylinder, so take next
383 	 * available one in this cylinder group.
384 	 * first try to get 8 contiguous blocks, then fall back to a single
385 	 * block.
386 	 */
387 	if (bpref)
388 		start = dtogd(fs, bpref) / NBBY;
389 	else
390 		start = 0;
391 	end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
392 	for (loc = start; loc < end; loc++) {
393 		if (bbp[loc] == 0) {
394 			bno = loc * NBBY;
395 			goto gotit;
396 		}
397 	}
398 	for (loc = 0; loc < start; loc++) {
399 		if (bbp[loc] == 0) {
400 			bno = loc * NBBY;
401 			goto gotit;
402 		}
403 	}
404 
405 	bno = ext2fs_mapsearch(fs, bbp, bpref);
406 #if 0
407 	/*
408 	 * XXX jdolecek mapsearch actually never fails, it panics instead.
409 	 * If re-enabling, make sure to brele() before returning.
410 	 */
411 	if (bno < 0)
412 		return 0;
413 #endif
414 gotit:
415 #ifdef DIAGNOSTIC
416 	if (isset(bbp, (daddr_t)bno)) {
417 		printf("%s: cg=%d bno=%d fs=%s\n", __func__,
418 		    cg, bno, fs->e2fs_fsmnt);
419 		panic("ext2fs_alloccg: dup alloc");
420 	}
421 #endif
422 	setbit(bbp, (daddr_t)bno);
423 	fs->e2fs.e2fs_fbcount--;
424 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0);
425 	fs->e2fs_fmod = 1;
426 	bdwrite(bp);
427 	return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno;
428 }
429 
430 /*
431  * Determine whether an inode can be allocated.
432  *
433  * Check to see if an inode is available, and if it is,
434  * allocate it using the following policy:
435  *   1) allocate the requested inode.
436  *   2) allocate the next available inode after the requested
437  *	  inode in the specified cylinder group.
438  */
439 static daddr_t
ext2fs_nodealloccg(struct inode * ip,int cg,daddr_t ipref,int mode)440 ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
441 {
442 	struct m_ext2fs *fs;
443 	char *ibp;
444 	struct buf *bp;
445 	int error, start, len, loc, map, i;
446 
447 	ipref--; /* to avoid a lot of (ipref -1) */
448 	if (ipref == -1)
449 		ipref = 0;
450 	fs = ip->i_e2fs;
451 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0 &&
452 	    fs->e2fs_gd[cg].ext2bgd_nifree_hi == 0)
453 		return 0;
454 	error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
455 		fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
456 		fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
457 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
458 	if (error) {
459 		return 0;
460 	}
461 	ibp = (char *)bp->b_data;
462 
463 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
464 
465 	/* initialize inode bitmap now if uninit */
466 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
467 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) {
468 		KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg);
469 		memset(ibp, 0, fs->e2fs_bsize);
470 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT);
471 	}
472 
473 	if (ipref) {
474 		ipref %= fs->e2fs.e2fs_ipg;
475 		if (isclr(ibp, ipref))
476 			goto gotit;
477 	}
478 	start = ipref / NBBY;
479 	len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
480 	loc = skpc(0xff, len, &ibp[start]);
481 	if (loc == 0) {
482 		len = start + 1;
483 		start = 0;
484 		loc = skpc(0xff, len, &ibp[0]);
485 		if (loc == 0) {
486 			printf("%s: cg = %d, ipref = %lld, fs = %s\n", __func__,
487 			    cg, (long long)ipref, fs->e2fs_fsmnt);
488 			panic("%s: map corrupted", __func__);
489 			/* NOTREACHED */
490 		}
491 	}
492 	i = start + len - loc;
493 	map = ibp[i] ^ 0xff;
494 	if (map == 0) {
495 		printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
496 		panic("%s: inode not in map", __func__);
497 	}
498 	ipref = i * NBBY + ffs(map) - 1;
499 gotit:
500 	setbit(ibp, ipref);
501 	fs->e2fs.e2fs_ficount--;
502 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
503 		0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref);
504 	fs->e2fs_fmod = 1;
505 	bdwrite(bp);
506 	return cg * fs->e2fs.e2fs_ipg + ipref + 1;
507 }
508 
509 /*
510  * Free a block.
511  *
512  * The specified block is placed back in the
513  * free map.
514  */
515 void
ext2fs_blkfree(struct inode * ip,daddr_t bno)516 ext2fs_blkfree(struct inode *ip, daddr_t bno)
517 {
518 	struct m_ext2fs *fs;
519 	char *bbp;
520 	struct buf *bp;
521 	int error, cg;
522 
523 	fs = ip->i_e2fs;
524 	cg = dtog(fs, bno);
525 
526 	KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
527 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0);
528 
529 	if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
530 		printf("%s: bad block %jd, ino %ju\n",
531 		    __func__, (intmax_t)bno, (uintmax_t)ip->i_number);
532 		ext2fs_fserr(fs, ip->i_uid, "bad block");
533 		return;
534 	}
535 	error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
536 	    fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
537 	    fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
538 	    (int)fs->e2fs_bsize, B_MODIFY, &bp);
539 	if (error) {
540 		return;
541 	}
542 	bbp = (char *)bp->b_data;
543 	bno = dtogd(fs, bno);
544 	if (isclr(bbp, bno)) {
545 		printf("%s: dev = %#jx, block = %jd, fs = %s\n", __func__,
546 		    (uintmax_t)ip->i_dev, (intmax_t)bno,
547 		    fs->e2fs_fsmnt);
548 		panic("%s: freeing free block", __func__);
549 	}
550 	clrbit(bbp, bno);
551 	fs->e2fs.e2fs_fbcount++;
552 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0);
553 	fs->e2fs_fmod = 1;
554 	bdwrite(bp);
555 }
556 
557 /*
558  * Free an inode.
559  *
560  * The specified inode is placed back in the free map.
561  */
562 int
ext2fs_vfree(struct vnode * pvp,ino_t ino,int mode)563 ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
564 {
565 	struct m_ext2fs *fs;
566 	char *ibp;
567 	struct inode *pip;
568 	struct buf *bp;
569 	int error, cg;
570 
571 	pip = VTOI(pvp);
572 	fs = pip->i_e2fs;
573 
574 	if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
575 		panic("%s: range: dev = %#jx, ino = %ju, fs = %s",
576 		    __func__, (uintmax_t)pip->i_dev, (uintmax_t)ino,
577 		    fs->e2fs_fsmnt);
578 
579 	cg = ino_to_cg(fs, ino);
580 
581 	KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
582 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
583 
584 	error = bread(pip->i_devvp, EXT2_FSBTODB64(fs,
585 	    fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
586 	    fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
587 	    (int)fs->e2fs_bsize, B_MODIFY, &bp);
588 	if (error) {
589 		return 0;
590 	}
591 	ibp = (char *)bp->b_data;
592 	ino = (ino - 1) % fs->e2fs.e2fs_ipg;
593 	if (isclr(ibp, ino)) {
594 		printf("%s: dev = %#jx, ino = %ju, fs = %s\n", __func__,
595 		    (uintmax_t)pip->i_dev,
596 		    (uintmax_t)ino, fs->e2fs_fsmnt);
597 		if (fs->e2fs_ronly == 0)
598 			panic("%s: freeing free inode", __func__);
599 	}
600 	clrbit(ibp, ino);
601 	fs->e2fs.e2fs_ficount++;
602 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
603 		0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0);
604 	fs->e2fs_fmod = 1;
605 	bdwrite(bp);
606 	return 0;
607 }
608 
609 /*
610  * Find a block in the specified cylinder group.
611  *
612  * It is a panic if a request is made to find a block if none are
613  * available.
614  */
615 
616 static daddr_t
ext2fs_mapsearch(struct m_ext2fs * fs,char * bbp,daddr_t bpref)617 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
618 {
619 	int start, len, loc, i, map;
620 
621 	/*
622 	 * find the fragment by searching through the free block
623 	 * map for an appropriate bit pattern
624 	 */
625 	if (bpref)
626 		start = dtogd(fs, bpref) / NBBY;
627 	else
628 		start = 0;
629 	len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
630 	loc = skpc(0xff, len, &bbp[start]);
631 	if (loc == 0) {
632 		len = start + 1;
633 		start = 0;
634 		loc = skpc(0xff, len, &bbp[start]);
635 		if (loc == 0) {
636 			printf("%s: start = %d, len = %d, fs = %s\n",
637 			    __func__, start, len, fs->e2fs_fsmnt);
638 			panic("%s: map corrupted", __func__);
639 			/* NOTREACHED */
640 		}
641 	}
642 	i = start + len - loc;
643 	map = bbp[i] ^ 0xff;
644 	if (map == 0) {
645 		printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
646 		panic("%s: block not in map", __func__);
647 	}
648 	return i * NBBY + ffs(map) - 1;
649 }
650 
651 /*
652  * Fserr prints the name of a file system with an error diagnostic.
653  *
654  * The form of the error message is:
655  *	fs: error message
656  */
657 static void
ext2fs_fserr(struct m_ext2fs * fs,u_int uid,const char * cp)658 ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
659 {
660 
661 	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
662 }
663 
664 static __inline void
ext2fs_cg_update(struct m_ext2fs * fs,int cg,struct ext2_gd * gd,int nbfree,int nifree,int ndirs,daddr_t ioff)665 ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
666 {
667 	if (nifree) {
668 		uint32_t ext2bgd_nifree = fs2h16(gd->ext2bgd_nifree) |
669 		    (fs2h16(gd->ext2bgd_nifree_hi) << 16);
670 		ext2bgd_nifree += nifree;
671 		gd->ext2bgd_nifree = h2fs16(ext2bgd_nifree);
672 		gd->ext2bgd_nifree_hi = h2fs16(ext2bgd_nifree >> 16);
673 		/*
674 		 * If we allocated inode on bigger offset than what was
675 		 * ever used before, bump the itable_unused count. This
676 		 * member only ever grows, and is used only for initialization
677 		 * !INODE_ZEROED groups with used inodes. Of course, by the
678 		 * time we get here the itables are already zeroed, but
679 		 * e2fstools fsck.ext4 still checks this.
680 		 */
681 		if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 &&
682 		    (ioff + 1) >= (fs->e2fs.e2fs_ipg -
683 		    fs2h16(gd->ext2bgd_itable_unused_lo))) {
684 			gd->ext2bgd_itable_unused_lo =
685 			    h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
686 		}
687 
688 		KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
689 		    gd->ext2bgd_itable_unused_lo <= ext2bgd_nifree);
690 	}
691 
692 
693 	if (nbfree) {
694 		uint32_t ext2bgd_nbfree = fs2h16(gd->ext2bgd_nbfree)
695 		    | (fs2h16(gd->ext2bgd_nbfree_hi) << 16);
696 		ext2bgd_nbfree += nbfree;
697 		gd->ext2bgd_nbfree = h2fs16(ext2bgd_nbfree);
698 		gd->ext2bgd_nbfree_hi = h2fs16(ext2bgd_nbfree >> 16);
699 	}
700 
701 	if (ndirs) {
702 		uint32_t ext2bgd_ndirs = fs2h16(gd->ext2bgd_ndirs)
703 		    | (fs2h16(gd->ext2bgd_ndirs_hi) << 16);
704 		ext2bgd_ndirs += ndirs;
705 		gd->ext2bgd_ndirs = h2fs16(ext2bgd_ndirs);
706 		gd->ext2bgd_ndirs_hi = h2fs16(ext2bgd_ndirs >> 16);
707 	}
708 
709 	if (E2FS_HAS_GD_CSUM(fs))
710 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
711 }
712 
713 static const uint32_t ext2fs_crc32c_table[256] = {
714 	0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
715 	0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
716 	0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
717 	0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
718 	0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
719 	0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
720 	0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
721 	0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
722 	0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
723 	0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
724 	0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
725 	0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
726 	0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
727 	0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
728 	0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
729 	0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
730 	0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
731 	0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
732 	0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
733 	0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
734 	0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
735 	0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
736 	0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
737 	0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
738 	0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
739 	0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
740 	0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
741 	0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
742 	0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
743 	0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
744 	0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
745 	0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
746 	0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
747 	0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
748 	0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
749 	0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
750 	0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
751 	0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
752 	0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
753 	0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
754 	0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
755 	0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
756 	0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
757 	0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
758 	0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
759 	0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
760 	0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
761 	0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
762 	0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
763 	0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
764 	0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
765 	0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
766 	0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
767 	0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
768 	0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
769 	0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
770 	0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
771 	0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
772 	0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
773 	0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
774 	0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
775 	0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
776 	0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
777 	0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
778 };
779 
780 static uint32_t
ext2fs_crc32c(uint32_t last,const void * vbuf,size_t len)781 ext2fs_crc32c(uint32_t last, const void *vbuf, size_t len)
782 {
783 	uint32_t crc = last;
784 	const uint8_t *buf = vbuf;
785 
786 	while (len--)
787 		crc = ext2fs_crc32c_table[(crc ^ *buf++) & 0xff] ^ (crc >> 8);
788 
789 	return crc;
790 }
791 
792 /*
793  * Compute group description csum. Structure data must be LE (not host).
794  * Returned as LE (disk encoding).
795  */
796 static uint16_t
ext2fs_cg_get_csum(struct m_ext2fs * fs,int cg,struct ext2_gd * gd)797 ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
798 {
799 	uint16_t crc;
800 	size_t cgsize = 1 << fs->e2fs_group_desc_shift;
801 	uint32_t cg_bswapped = h2fs32((uint32_t)cg);
802 	size_t off;
803 
804 	if (EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
805 		uint32_t crc32;
806 		uint8_t dummy[2] = {0, 0};
807 
808 		off = offsetof(struct ext2_gd, ext2bgd_checksum);
809 
810 		crc32 = ext2fs_crc32c(~0, fs->e2fs.e2fs_uuid,
811 		    sizeof(fs->e2fs.e2fs_uuid));
812 		crc32 = ext2fs_crc32c(crc32, &cg_bswapped, sizeof(cg_bswapped));
813 		crc32 = ext2fs_crc32c(crc32, gd, off);
814 		crc32 = ext2fs_crc32c(crc32, dummy, 2);
815 		crc32 = ext2fs_crc32c(crc32, gd + off + 2, cgsize - (off + 2));
816 
817 		return h2fs16(crc32 & 0xffff);
818 	}
819 
820 	if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM))
821 		return 0;
822 
823 	off = offsetof(struct ext2_gd, ext2bgd_checksum);
824 
825 	crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid,
826 	    sizeof(fs->e2fs.e2fs_uuid));
827 	crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
828 	crc = crc16(crc, (uint8_t *)gd, off);
829 	crc = crc16(crc, (uint8_t *)gd + off + 2, cgsize - (off + 2));
830 
831 	return h2fs16(crc);
832 }
833 
834 static void
ext2fs_init_bb(struct m_ext2fs * fs,int cg,struct ext2_gd * gd,char * bbp)835 ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
836 {
837 	int i;
838 
839 	memset(bbp, 0, fs->e2fs_bsize);
840 
841 	/*
842 	 * No block was ever allocated on this cg before, so the only used
843 	 * blocks are metadata blocks on start of the group. We could optimize
844 	 * this to set by bytes, but since this is done once per the group
845 	 * in lifetime of filesystem, it really is not worth it.
846 	 */
847 	for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++)
848 		setbit(bbp, i);
849 }
850 
851 /*
852  * Verify csum and initialize itable if not done already
853  */
854 int
ext2fs_cg_verify_and_initialize(struct vnode * devvp,struct m_ext2fs * fs,int ronly)855 ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
856 {
857 	struct ext2_gd *gd;
858 	ino_t ioff;
859 	size_t boff;
860 	struct buf *bp;
861 	int cg, i, error;
862 
863 	if (!E2FS_HAS_GD_CSUM(fs))
864 		return 0;
865 
866 	for(cg = 0; cg < fs->e2fs_ncg; cg++) {
867 		gd = &fs->e2fs_gd[cg];
868 
869 		/* Verify checksum */
870 		uint16_t csum = ext2fs_cg_get_csum(fs, cg, gd);
871 		if (gd->ext2bgd_checksum != csum) {
872 			printf("%s: group %d invalid csum (%#x != %#x)\n",
873 			    __func__, cg, gd->ext2bgd_checksum, csum);
874 			return EINVAL;
875 		}
876 
877 		/* if mounting read-write, zero itable if not already done */
878 		if (ronly ||
879 		    (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0)
880 			continue;
881 
882 		/*
883 		 * We are skipping already used inodes, zero rest of itable
884 		 * blocks. First block to zero could be only partial wipe, all
885 		 * others are wiped completely. This might take a while,
886 		 * there could be many inode table blocks. We use
887 		 * delayed writes, so this shouldn't block for very
888 		 * long.
889 		 */
890 		ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo);
891 		boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs);
892 
893 		for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) {
894 			if (boff) {
895 				/* partial wipe, must read old data */
896 				error = bread(devvp, EXT2_FSBTODB64OFF(fs,
897 				    fs2h32(gd->ext2bgd_i_tables),
898 				    fs2h32(gd->ext2bgd_i_tables_hi), i),
899 				    (int)fs->e2fs_bsize, B_MODIFY, &bp);
900 				if (error) {
901 					printf("%s: can't read itable block",
902 					    __func__);
903 					return error;
904 				}
905 				memset((char *)bp->b_data + boff, 0,
906 				    fs->e2fs_bsize - boff);
907 				boff = 0;
908 			} else {
909 				/*
910 				 * Complete wipe, don't need to read data. This
911 				 * assumes nothing else is changing the data.
912 				 */
913 				bp = getblk(devvp, EXT2_FSBTODB64OFF(fs,
914 				    fs2h32(gd->ext2bgd_i_tables),
915 				    fs2h32(gd->ext2bgd_i_tables_hi), i),
916 				    (int)fs->e2fs_bsize, 0, 0);
917 				clrbuf(bp);
918 			}
919 
920 			bdwrite(bp);
921 		}
922 
923 		gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
924 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
925 		fs->e2fs_fmod = 1;
926 	}
927 
928 	return 0;
929 }
930