xref: /netbsd-src/sys/ufs/ext2fs/ext2fs_alloc.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*	$NetBSD: ext2fs_alloc.c,v 1.52 2017/05/28 16:38:55 hannken Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
32  *  Modified for ext2fs by Manuel Bouyer.
33  */
34 
35 /*
36  * Copyright (c) 1997 Manuel Bouyer.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57  *
58  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
59  *  Modified for ext2fs by Manuel Bouyer.
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.52 2017/05/28 16:38:55 hannken Exp $");
64 
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/buf.h>
68 #include <sys/proc.h>
69 #include <sys/vnode.h>
70 #include <sys/mount.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <sys/kauth.h>
74 
75 #include <lib/libkern/crc16.h>
76 
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/ufsmount.h>
80 
81 #include <ufs/ext2fs/ext2fs.h>
82 #include <ufs/ext2fs/ext2fs_extern.h>
83 
84 u_long ext2gennumber;
85 
86 static daddr_t	ext2fs_alloccg(struct inode *, int, daddr_t, int);
87 static u_long	ext2fs_dirpref(struct m_ext2fs *);
88 static void	ext2fs_fserr(struct m_ext2fs *, u_int, const char *);
89 static u_long	ext2fs_hashalloc(struct inode *, int, long, int,
90 		    daddr_t (*)(struct inode *, int, daddr_t, int));
91 static daddr_t	ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
92 static daddr_t	ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
93 static __inline void	ext2fs_cg_update(struct m_ext2fs *, int, struct ext2_gd *, int, int, int, daddr_t);
94 static uint16_t 	ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
95 static void		ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *, char *);
96 
97 /*
98  * Allocate a block in the file system.
99  *
100  * A preference may be optionally specified. If a preference is given
101  * the following hierarchy is used to allocate a block:
102  *   1) allocate the requested block.
103  *   2) allocate a rotationally optimal block in the same cylinder.
104  *   3) allocate a block in the same cylinder group.
105  *   4) quadradically rehash into other cylinder groups, until an
106  *	  available block is located.
107  * If no block preference is given the following hierarchy is used
108  * to allocate a block:
109  *   1) allocate a block in the cylinder group that contains the
110  *	  inode for the file.
111  *   2) quadradically rehash into other cylinder groups, until an
112  *	  available block is located.
113  */
114 int
115 ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref,
116     kauth_cred_t cred, daddr_t *bnp)
117 {
118 	struct m_ext2fs *fs;
119 	daddr_t bno;
120 	int cg;
121 
122 	*bnp = 0;
123 	fs = ip->i_e2fs;
124 #ifdef DIAGNOSTIC
125 	if (cred == NOCRED)
126 		panic("ext2fs_alloc: missing credential");
127 #endif /* DIAGNOSTIC */
128 	if (fs->e2fs.e2fs_fbcount == 0)
129 		goto nospace;
130 	if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
131 	    NULL, NULL) != 0 &&
132 	    freespace(fs) <= 0)
133 		goto nospace;
134 	if (bpref >= fs->e2fs.e2fs_bcount)
135 		bpref = 0;
136 	if (bpref == 0)
137 		cg = ino_to_cg(fs, ip->i_number);
138 	else
139 		cg = dtog(fs, bpref);
140 	bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
141 	    ext2fs_alloccg);
142 	if (bno > 0) {
143 		ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize));
144 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
145 		*bnp = bno;
146 		return 0;
147 	}
148 nospace:
149 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
150 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
151 	return ENOSPC;
152 }
153 
154 /*
155  * Allocate an inode in the file system.
156  *
157  * If allocating a directory, use ext2fs_dirpref to select the inode.
158  * If allocating in a directory, the following hierarchy is followed:
159  *   1) allocate the preferred inode.
160  *   2) allocate an inode in the same cylinder group.
161  *   3) quadradically rehash into other cylinder groups, until an
162  *	  available inode is located.
163  * If no inode preference is given the following hierarchy is used
164  * to allocate an inode:
165  *   1) allocate an inode in cylinder group 0.
166  *   2) quadradically rehash into other cylinder groups, until an
167  *	  available inode is located.
168  */
169 int
170 ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop)
171 {
172 	struct inode *pip;
173 	struct m_ext2fs *fs;
174 	ino_t ino, ipref;
175 	int cg;
176 
177 	pip = VTOI(pvp);
178 	fs = pip->i_e2fs;
179 	if (fs->e2fs.e2fs_ficount == 0)
180 		goto noinodes;
181 
182 	if ((mode & IFMT) == IFDIR)
183 		cg = ext2fs_dirpref(fs);
184 	else
185 		cg = ino_to_cg(fs, pip->i_number);
186 	ipref = cg * fs->e2fs.e2fs_ipg + 1;
187 	ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
188 	if (ino == 0)
189 		goto noinodes;
190 
191 	*inop = ino;
192 	return 0;
193 
194 noinodes:
195 	ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
196 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
197 	return ENOSPC;
198 }
199 
200 /*
201  * Find a cylinder to place a directory.
202  *
203  * The policy implemented by this algorithm is to select from
204  * among those cylinder groups with above the average number of
205  * free inodes, the one with the smallest number of directories.
206  */
207 static u_long
208 ext2fs_dirpref(struct m_ext2fs *fs)
209 {
210 	int cg, maxspace, mincg, avgifree;
211 
212 	avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
213 	maxspace = 0;
214 	mincg = -1;
215 	for (cg = 0; cg < fs->e2fs_ncg; cg++)
216 		if (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) >= avgifree) {
217 			if (mincg == -1 || fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree) > maxspace) {
218 				mincg = cg;
219 				maxspace = fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
220 			}
221 		}
222 	return mincg;
223 }
224 
225 /*
226  * Select the desired position for the next block in a file.  The file is
227  * logically divided into sections. The first section is composed of the
228  * direct blocks. Each additional section contains fs_maxbpg blocks.
229  *
230  * If no blocks have been allocated in the first section, the policy is to
231  * request a block in the same cylinder group as the inode that describes
232  * the file. Otherwise, the policy is to try to allocate the blocks
233  * contigously. The two fields of the ext2 inode extension (see
234  * ufs/ufs/inode.h) help this.
235  */
236 daddr_t
237 ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx,
238 		int32_t *bap /* XXX ondisk32 */)
239 {
240 	struct m_ext2fs *fs;
241 	int cg, i;
242 
243 	fs = ip->i_e2fs;
244 	/*
245 	 * if we are doing contigous lbn allocation, try to alloc blocks
246 	 * contigously on disk
247 	 */
248 
249 	if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
250 		return ip->i_e2fs_last_blk + 1;
251 	}
252 
253 	/*
254 	 * bap, if provided, gives us a list of blocks to which we want to
255 	 * stay close
256 	 */
257 
258 	if (bap) {
259 		for (i = indx; i >= 0 ; i--) {
260 			if (bap[i]) {
261 				return fs2h32(bap[i]) + 1;
262 			}
263 		}
264 	}
265 
266 	/* fall back to the first block of the cylinder containing the inode */
267 
268 	cg = ino_to_cg(fs, ip->i_number);
269 	return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
270 }
271 
272 /*
273  * Implement the cylinder overflow algorithm.
274  *
275  * The policy implemented by this algorithm is:
276  *   1) allocate the block in its requested cylinder group.
277  *   2) quadradically rehash on the cylinder group number.
278  *   3) brute force search for a free block.
279  */
280 static u_long
281 ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
282 		daddr_t (*allocator)(struct inode *, int, daddr_t, int))
283 {
284 	struct m_ext2fs *fs;
285 	long result;
286 	int i, icg = cg;
287 
288 	fs = ip->i_e2fs;
289 	/*
290 	 * 1: preferred cylinder group
291 	 */
292 	result = (*allocator)(ip, cg, pref, size);
293 	if (result)
294 		return result;
295 	/*
296 	 * 2: quadratic rehash
297 	 */
298 	for (i = 1; i < fs->e2fs_ncg; i *= 2) {
299 		cg += i;
300 		if (cg >= fs->e2fs_ncg)
301 			cg -= fs->e2fs_ncg;
302 		result = (*allocator)(ip, cg, 0, size);
303 		if (result)
304 			return result;
305 	}
306 	/*
307 	 * 3: brute force search
308 	 * Note that we start at i == 2, since 0 was checked initially,
309 	 * and 1 is always checked in the quadratic rehash.
310 	 */
311 	cg = (icg + 2) % fs->e2fs_ncg;
312 	for (i = 2; i < fs->e2fs_ncg; i++) {
313 		result = (*allocator)(ip, cg, 0, size);
314 		if (result)
315 			return result;
316 		cg++;
317 		if (cg == fs->e2fs_ncg)
318 			cg = 0;
319 	}
320 	return 0;
321 }
322 
323 /*
324  * Determine whether a block can be allocated.
325  *
326  * Check to see if a block of the appropriate size is available,
327  * and if it is, allocate it.
328  */
329 
330 static daddr_t
331 ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
332 {
333 	struct m_ext2fs *fs;
334 	char *bbp;
335 	struct buf *bp;
336 	/* XXX ondisk32 */
337 	int error, bno, start, end, loc;
338 
339 	fs = ip->i_e2fs;
340 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
341 		return 0;
342 	error = bread(ip->i_devvp, EXT2_FSBTODB(fs,
343 		fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)),
344 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
345 	if (error) {
346 		return 0;
347 	}
348 	bbp = (char *)bp->b_data;
349 
350 	if (dtog(fs, bpref) != cg)
351 		bpref = 0;
352 
353 	/* initialize block bitmap now if uninit */
354 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
355 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) {
356 		ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp);
357 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT);
358 	}
359 
360 	if (bpref != 0) {
361 		bpref = dtogd(fs, bpref);
362 		/*
363 		 * if the requested block is available, use it
364 		 */
365 		if (isclr(bbp, bpref)) {
366 			bno = bpref;
367 			goto gotit;
368 		}
369 	}
370 	/*
371 	 * no blocks in the requested cylinder, so take next
372 	 * available one in this cylinder group.
373 	 * first try to get 8 contigous blocks, then fall back to a single
374 	 * block.
375 	 */
376 	if (bpref)
377 		start = dtogd(fs, bpref) / NBBY;
378 	else
379 		start = 0;
380 	end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
381 	for (loc = start; loc < end; loc++) {
382 		if (bbp[loc] == 0) {
383 			bno = loc * NBBY;
384 			goto gotit;
385 		}
386 	}
387 	for (loc = 0; loc < start; loc++) {
388 		if (bbp[loc] == 0) {
389 			bno = loc * NBBY;
390 			goto gotit;
391 		}
392 	}
393 
394 	bno = ext2fs_mapsearch(fs, bbp, bpref);
395 #if 0
396 	/*
397 	 * XXX jdolecek mapsearch actually never fails, it panics instead.
398 	 * If re-enabling, make sure to brele() before returning.
399 	 */
400 	if (bno < 0)
401 		return 0;
402 #endif
403 gotit:
404 #ifdef DIAGNOSTIC
405 	if (isset(bbp, (daddr_t)bno)) {
406 		printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
407 			cg, bno, fs->e2fs_fsmnt);
408 		panic("ext2fs_alloccg: dup alloc");
409 	}
410 #endif
411 	setbit(bbp, (daddr_t)bno);
412 	fs->e2fs.e2fs_fbcount--;
413 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0);
414 	fs->e2fs_fmod = 1;
415 	bdwrite(bp);
416 	return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno;
417 }
418 
419 /*
420  * Determine whether an inode can be allocated.
421  *
422  * Check to see if an inode is available, and if it is,
423  * allocate it using the following policy:
424  *   1) allocate the requested inode.
425  *   2) allocate the next available inode after the requested
426  *	  inode in the specified cylinder group.
427  */
428 static daddr_t
429 ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
430 {
431 	struct m_ext2fs *fs;
432 	char *ibp;
433 	struct buf *bp;
434 	int error, start, len, loc, map, i;
435 
436 	ipref--; /* to avoid a lot of (ipref -1) */
437 	if (ipref == -1)
438 		ipref = 0;
439 	fs = ip->i_e2fs;
440 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
441 		return 0;
442 	error = bread(ip->i_devvp, EXT2_FSBTODB(fs,
443 		fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)),
444 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
445 	if (error) {
446 		return 0;
447 	}
448 	ibp = (char *)bp->b_data;
449 
450 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
451 
452 	/* initialize inode bitmap now if uninit */
453 	if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
454 	    (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) {
455 		KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg);
456 		memset(ibp, 0, fs->e2fs_bsize);
457 		fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT);
458 	}
459 
460 	if (ipref) {
461 		ipref %= fs->e2fs.e2fs_ipg;
462 		if (isclr(ibp, ipref))
463 			goto gotit;
464 	}
465 	start = ipref / NBBY;
466 	len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
467 	loc = skpc(0xff, len, &ibp[start]);
468 	if (loc == 0) {
469 		len = start + 1;
470 		start = 0;
471 		loc = skpc(0xff, len, &ibp[0]);
472 		if (loc == 0) {
473 			printf("cg = %d, ipref = %lld, fs = %s\n",
474 				cg, (long long)ipref, fs->e2fs_fsmnt);
475 			panic("ext2fs_nodealloccg: map corrupted");
476 			/* NOTREACHED */
477 		}
478 	}
479 	i = start + len - loc;
480 	map = ibp[i] ^ 0xff;
481 	if (map == 0) {
482 		printf("fs = %s\n", fs->e2fs_fsmnt);
483 		panic("ext2fs_nodealloccg: inode not in map");
484 	}
485 	ipref = i * NBBY + ffs(map) - 1;
486 gotit:
487 	setbit(ibp, ipref);
488 	fs->e2fs.e2fs_ficount--;
489 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
490 		0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref);
491 	fs->e2fs_fmod = 1;
492 	bdwrite(bp);
493 	return cg * fs->e2fs.e2fs_ipg + ipref + 1;
494 }
495 
496 /*
497  * Free a block.
498  *
499  * The specified block is placed back in the
500  * free map.
501  */
502 void
503 ext2fs_blkfree(struct inode *ip, daddr_t bno)
504 {
505 	struct m_ext2fs *fs;
506 	char *bbp;
507 	struct buf *bp;
508 	int error, cg;
509 
510 	fs = ip->i_e2fs;
511 	cg = dtog(fs, bno);
512 
513 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0);
514 
515 	if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
516 		printf("bad block %lld, ino %llu\n", (long long)bno,
517 		    (unsigned long long)ip->i_number);
518 		ext2fs_fserr(fs, ip->i_uid, "bad block");
519 		return;
520 	}
521 	error = bread(ip->i_devvp,
522 		EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)),
523 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
524 	if (error) {
525 		return;
526 	}
527 	bbp = (char *)bp->b_data;
528 	bno = dtogd(fs, bno);
529 	if (isclr(bbp, bno)) {
530 		printf("dev = 0x%llx, block = %lld, fs = %s\n",
531 		    (unsigned long long)ip->i_dev, (long long)bno,
532 		    fs->e2fs_fsmnt);
533 		panic("blkfree: freeing free block");
534 	}
535 	clrbit(bbp, bno);
536 	fs->e2fs.e2fs_fbcount++;
537 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0);
538 	fs->e2fs_fmod = 1;
539 	bdwrite(bp);
540 }
541 
542 /*
543  * Free an inode.
544  *
545  * The specified inode is placed back in the free map.
546  */
547 int
548 ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
549 {
550 	struct m_ext2fs *fs;
551 	char *ibp;
552 	struct inode *pip;
553 	struct buf *bp;
554 	int error, cg;
555 
556 	pip = VTOI(pvp);
557 	fs = pip->i_e2fs;
558 
559 	if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
560 		panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s",
561 		    (unsigned long long)pip->i_dev, (unsigned long long)ino,
562 		    fs->e2fs_fsmnt);
563 
564 	cg = ino_to_cg(fs, ino);
565 
566 	KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
567 
568 	error = bread(pip->i_devvp,
569 		EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)),
570 		(int)fs->e2fs_bsize, B_MODIFY, &bp);
571 	if (error) {
572 		return 0;
573 	}
574 	ibp = (char *)bp->b_data;
575 	ino = (ino - 1) % fs->e2fs.e2fs_ipg;
576 	if (isclr(ibp, ino)) {
577 		printf("dev = 0x%llx, ino = %llu, fs = %s\n",
578 		    (unsigned long long)pip->i_dev,
579 		    (unsigned long long)ino, fs->e2fs_fsmnt);
580 		if (fs->e2fs_ronly == 0)
581 			panic("ifree: freeing free inode");
582 	}
583 	clrbit(ibp, ino);
584 	fs->e2fs.e2fs_ficount++;
585 	ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
586 		0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0);
587 	fs->e2fs_fmod = 1;
588 	bdwrite(bp);
589 	return 0;
590 }
591 
592 /*
593  * Find a block in the specified cylinder group.
594  *
595  * It is a panic if a request is made to find a block if none are
596  * available.
597  */
598 
599 static daddr_t
600 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
601 {
602 	int start, len, loc, i, map;
603 
604 	/*
605 	 * find the fragment by searching through the free block
606 	 * map for an appropriate bit pattern
607 	 */
608 	if (bpref)
609 		start = dtogd(fs, bpref) / NBBY;
610 	else
611 		start = 0;
612 	len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
613 	loc = skpc(0xff, len, &bbp[start]);
614 	if (loc == 0) {
615 		len = start + 1;
616 		start = 0;
617 		loc = skpc(0xff, len, &bbp[start]);
618 		if (loc == 0) {
619 			printf("start = %d, len = %d, fs = %s\n",
620 				start, len, fs->e2fs_fsmnt);
621 			panic("ext2fs_alloccg: map corrupted");
622 			/* NOTREACHED */
623 		}
624 	}
625 	i = start + len - loc;
626 	map = bbp[i] ^ 0xff;
627 	if (map == 0) {
628 		printf("fs = %s\n", fs->e2fs_fsmnt);
629 		panic("ext2fs_mapsearch: block not in map");
630 	}
631 	return i * NBBY + ffs(map) - 1;
632 }
633 
634 /*
635  * Fserr prints the name of a file system with an error diagnostic.
636  *
637  * The form of the error message is:
638  *	fs: error message
639  */
640 static void
641 ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
642 {
643 
644 	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
645 }
646 
647 static __inline void
648 ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
649 {
650 	/* XXX disk32 */
651 	if (nifree) {
652 		gd->ext2bgd_nifree = h2fs16(fs2h16(gd->ext2bgd_nifree) + nifree);
653 		/*
654 		 * If we allocated inode on bigger offset than what was
655 		 * ever used before, bump the itable_unused count. This
656 		 * member only ever grows, and is used only for initialization
657 		 * !INODE_ZEROED groups with used inodes. Of course, by the
658 		 * time we get here the itables are already zeroed, but
659 		 * e2fstools fsck.ext4 still checks this.
660 		 */
661 		if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 && (ioff+1) >= (fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo))) {
662 			gd->ext2bgd_itable_unused_lo = h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
663 		}
664 
665 		KASSERT(!E2FS_HAS_GD_CSUM(fs) || gd->ext2bgd_itable_unused_lo <= gd->ext2bgd_nifree);
666 	}
667 
668 
669 	if (nbfree)
670 		gd->ext2bgd_nbfree = h2fs16(fs2h16(gd->ext2bgd_nbfree) + nbfree);
671 
672 	if (ndirs)
673 		gd->ext2bgd_ndirs = h2fs16(fs2h16(gd->ext2bgd_ndirs) + ndirs);
674 
675 	if (E2FS_HAS_GD_CSUM(fs))
676 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
677 }
678 
679 /*
680  * Compute group description csum. Structure data must be LE (not host).
681  * Returned as LE (disk encoding).
682  */
683 static uint16_t
684 ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
685 {
686 	uint16_t crc;
687 	uint32_t cg_bswapped = h2fs32((uint32_t)cg);
688 	size_t off;
689 
690 	if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM))
691 		return 0;
692 
693 	off = offsetof(struct ext2_gd, ext2bgd_checksum);
694 
695 	crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid, sizeof(fs->e2fs.e2fs_uuid));
696 	crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
697 	crc = crc16(crc, (uint8_t *)gd, off);
698 	/* XXX ondisk32 */
699 
700 	return h2fs16(crc);
701 }
702 
703 static void
704 ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
705 {
706 	int i;
707 
708 	memset(bbp, 0, fs->e2fs_bsize);
709 
710 	/*
711 	 * No block was ever allocated on this cg before, so the only used
712 	 * blocks are metadata blocks on start of the group. We could optimize
713 	 * this to set by bytes, but since this is done once per the group
714 	 * in lifetime of filesystem, it really is not worth it.
715 	 */
716 	for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++)
717 		setbit(bbp, i);
718 }
719 
720 /*
721  * Verify csum and initialize itable if not done already
722  */
723 int
724 ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
725 {
726 	/* XXX disk32 */
727 	struct ext2_gd *gd;
728 	ino_t ioff;
729 	size_t boff;
730 	struct buf *bp;
731 	int cg, i, error;
732 
733 	if (!E2FS_HAS_GD_CSUM(fs))
734 		return 0;
735 
736 	for(cg=0; cg < fs->e2fs_ncg; cg++) {
737 		gd = &fs->e2fs_gd[cg];
738 
739 		/* Verify checksum */
740 		if (gd->ext2bgd_checksum != ext2fs_cg_get_csum(fs, cg, gd)) {
741 			printf("ext2fs_cg_verify_and_initialize: group %d invalid csum\n", cg);
742 			return EINVAL;
743 		}
744 
745 		/* if mounting read-write, zero itable if not already done */
746 		if (ronly || (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0)
747 			continue;
748 
749 		/*
750 		 * We are skipping already used inodes, zero rest of itable
751 		 * blocks. First block to zero could be only partial wipe, all
752 		 * others are wiped completely. This might take a while,
753 		 * there could be many inode table blocks. We use
754 		 * delayed writes, so this shouldn't block for very
755 		 * long.
756 		 */
757 		ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo);
758 		boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs);
759 
760 		for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) {
761 			if (boff) {
762 				/* partial wipe, must read old data */
763 				error = bread(devvp,
764 					EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i),
765 					(int)fs->e2fs_bsize, B_MODIFY, &bp);
766 				if (error) {
767 					printf("ext2fs_cg_verify_and_initialize: can't read itable block");
768 					return error;
769 				}
770 				memset((char *)bp->b_data + boff, 0, fs->e2fs_bsize - boff);
771 				boff = 0;
772 			} else {
773 				/*
774 				 * Complete wipe, don't need to read data. This
775 				 * assumes nothing else is changing the data.
776 				 */
777 				bp = getblk(devvp,
778 					EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i),
779 					(int)fs->e2fs_bsize, 0, 0);
780 				clrbuf(bp);
781 			}
782 
783 			bdwrite(bp);
784 		}
785 
786 		gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
787 		gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
788 		fs->e2fs_fmod = 1;
789 	}
790 
791 	return 0;
792 }
793