1 /* $NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
32 * Modified for ext2fs by Manuel Bouyer.
33 */
34
35 /*
36 * Copyright (c) 1997 Manuel Bouyer.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
49 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
50 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
51 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
56 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57 *
58 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
59 * Modified for ext2fs by Manuel Bouyer.
60 */
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/buf.h>
68 #include <sys/proc.h>
69 #include <sys/vnode.h>
70 #include <sys/mount.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <sys/kauth.h>
74
75 #include <lib/libkern/crc16.h>
76
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/ufsmount.h>
80
81 #include <ufs/ext2fs/ext2fs.h>
82 #include <ufs/ext2fs/ext2fs_extern.h>
83
84 u_long ext2gennumber;
85
86 static daddr_t ext2fs_alloccg(struct inode *, int, daddr_t, int);
87 static u_long ext2fs_dirpref(struct m_ext2fs *);
88 static void ext2fs_fserr(struct m_ext2fs *, u_int, const char *);
89 static u_long ext2fs_hashalloc(struct inode *, int, long, int,
90 daddr_t (*)(struct inode *, int, daddr_t, int));
91 static daddr_t ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
92 static daddr_t ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
93 static __inline void ext2fs_cg_update(struct m_ext2fs *, int,
94 struct ext2_gd *, int, int, int, daddr_t);
95 static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
96 static void ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *,
97 char *);
98
99 /*
100 * Allocate a block in the file system.
101 *
102 * A preference may be optionally specified. If a preference is given
103 * the following hierarchy is used to allocate a block:
104 * 1) allocate the requested block.
105 * 2) allocate a rotationally optimal block in the same cylinder.
106 * 3) allocate a block in the same cylinder group.
107 * 4) quadradically rehash into other cylinder groups, until an
108 * available block is located.
109 * If no block preference is given the following hierarchy is used
110 * to allocate a block:
111 * 1) allocate a block in the cylinder group that contains the
112 * inode for the file.
113 * 2) quadradically rehash into other cylinder groups, until an
114 * available block is located.
115 */
116 int
ext2fs_alloc(struct inode * ip,daddr_t lbn,daddr_t bpref,kauth_cred_t cred,daddr_t * bnp)117 ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref,
118 kauth_cred_t cred, daddr_t *bnp)
119 {
120 struct m_ext2fs *fs;
121 daddr_t bno;
122 int cg;
123
124 *bnp = 0;
125 fs = ip->i_e2fs;
126 #ifdef DIAGNOSTIC
127 if (cred == NOCRED)
128 panic("ext2fs_alloc: missing credential");
129 #endif /* DIAGNOSTIC */
130 if (fs->e2fs.e2fs_fbcount == 0)
131 goto nospace;
132 if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
133 NULL, NULL) != 0 &&
134 freespace(fs) <= 0)
135 goto nospace;
136 if (bpref >= fs->e2fs.e2fs_bcount)
137 bpref = 0;
138 if (bpref == 0)
139 cg = ino_to_cg(fs, ip->i_number);
140 else
141 cg = dtog(fs, bpref);
142 bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
143 ext2fs_alloccg);
144 if (bno > 0) {
145 ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize));
146 ip->i_flag |= IN_CHANGE | IN_UPDATE;
147 *bnp = bno;
148 return 0;
149 }
150 nospace:
151 ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
152 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
153 return ENOSPC;
154 }
155
156 /*
157 * Allocate an inode in the file system.
158 *
159 * If allocating a directory, use ext2fs_dirpref to select the inode.
160 * If allocating in a directory, the following hierarchy is followed:
161 * 1) allocate the preferred inode.
162 * 2) allocate an inode in the same cylinder group.
163 * 3) quadradically rehash into other cylinder groups, until an
164 * available inode is located.
165 * If no inode preference is given the following hierarchy is used
166 * to allocate an inode:
167 * 1) allocate an inode in cylinder group 0.
168 * 2) quadradically rehash into other cylinder groups, until an
169 * available inode is located.
170 */
171 int
ext2fs_valloc(struct vnode * pvp,int mode,kauth_cred_t cred,ino_t * inop)172 ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop)
173 {
174 struct inode *pip;
175 struct m_ext2fs *fs;
176 ino_t ino, ipref;
177 int cg;
178
179 pip = VTOI(pvp);
180 fs = pip->i_e2fs;
181 if (fs->e2fs.e2fs_ficount == 0)
182 goto noinodes;
183
184 if ((mode & IFMT) == IFDIR)
185 cg = ext2fs_dirpref(fs);
186 else
187 cg = ino_to_cg(fs, pip->i_number);
188 ipref = cg * fs->e2fs.e2fs_ipg + 1;
189 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
190 if (ino == 0)
191 goto noinodes;
192
193 *inop = ino;
194 return 0;
195
196 noinodes:
197 ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
198 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
199 return ENOSPC;
200 }
201
202 /*
203 * Find a cylinder to place a directory.
204 *
205 * The policy implemented by this algorithm is to select from
206 * among those cylinder groups with above the average number of
207 * free inodes, the one with the smallest number of directories.
208 */
209 static u_long
ext2fs_dirpref(struct m_ext2fs * fs)210 ext2fs_dirpref(struct m_ext2fs *fs)
211 {
212 int cg, maxspace, mincg, avgifree;
213
214 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
215 maxspace = 0;
216 mincg = -1;
217 for (cg = 0; cg < fs->e2fs_ncg; cg++) {
218 uint32_t nifree =
219 (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree_hi) << 16)
220 | fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree);
221 if (nifree < avgifree) {
222 continue;
223 }
224 uint32_t nbfree =
225 (fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree_hi) << 16)
226 | fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
227 if (mincg == -1 || nbfree > maxspace) {
228 mincg = cg;
229 maxspace = nbfree;
230 }
231 }
232 return mincg;
233 }
234
235 /*
236 * Select the desired position for the next block in a file. The file is
237 * logically divided into sections. The first section is composed of the
238 * direct blocks. Each additional section contains fs_maxbpg blocks.
239 *
240 * If no blocks have been allocated in the first section, the policy is to
241 * request a block in the same cylinder group as the inode that describes
242 * the file. Otherwise, the policy is to try to allocate the blocks
243 * contiguously. The two fields of the ext2 inode extension (see
244 * ufs/ufs/inode.h) help this.
245 */
246 daddr_t
ext2fs_blkpref(struct inode * ip,daddr_t lbn,int indx,int32_t * bap)247 ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx,
248 int32_t *bap /* XXX ondisk32 */)
249 {
250 struct m_ext2fs *fs;
251 int cg, i;
252
253 fs = ip->i_e2fs;
254 /*
255 * if we are doing contiguous lbn allocation, try to alloc blocks
256 * contiguously on disk
257 */
258
259 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
260 return ip->i_e2fs_last_blk + 1;
261 }
262
263 /*
264 * bap, if provided, gives us a list of blocks to which we want to
265 * stay close
266 */
267
268 if (bap) {
269 for (i = indx; i >= 0 ; i--) {
270 if (bap[i]) {
271 return fs2h32(bap[i]) + 1;
272 }
273 }
274 }
275
276 /* fall back to the first block of the cylinder containing the inode */
277
278 cg = ino_to_cg(fs, ip->i_number);
279 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
280 }
281
282 /*
283 * Implement the cylinder overflow algorithm.
284 *
285 * The policy implemented by this algorithm is:
286 * 1) allocate the block in its requested cylinder group.
287 * 2) quadradically rehash on the cylinder group number.
288 * 3) brute force search for a free block.
289 */
290 static u_long
ext2fs_hashalloc(struct inode * ip,int cg,long pref,int size,daddr_t (* allocator)(struct inode *,int,daddr_t,int))291 ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
292 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
293 {
294 struct m_ext2fs *fs;
295 long result;
296 int i, icg = cg;
297
298 fs = ip->i_e2fs;
299 /*
300 * 1: preferred cylinder group
301 */
302 result = (*allocator)(ip, cg, pref, size);
303 if (result)
304 return result;
305 /*
306 * 2: quadratic rehash
307 */
308 for (i = 1; i < fs->e2fs_ncg; i *= 2) {
309 cg += i;
310 if (cg >= fs->e2fs_ncg)
311 cg -= fs->e2fs_ncg;
312 result = (*allocator)(ip, cg, 0, size);
313 if (result)
314 return result;
315 }
316 /*
317 * 3: brute force search
318 * Note that we start at i == 2, since 0 was checked initially,
319 * and 1 is always checked in the quadratic rehash.
320 */
321 cg = (icg + 2) % fs->e2fs_ncg;
322 for (i = 2; i < fs->e2fs_ncg; i++) {
323 result = (*allocator)(ip, cg, 0, size);
324 if (result)
325 return result;
326 cg++;
327 if (cg == fs->e2fs_ncg)
328 cg = 0;
329 }
330 return 0;
331 }
332
333 /*
334 * Determine whether a block can be allocated.
335 *
336 * Check to see if a block of the appropriate size is available,
337 * and if it is, allocate it.
338 */
339
340 static daddr_t
ext2fs_alloccg(struct inode * ip,int cg,daddr_t bpref,int size)341 ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
342 {
343 struct m_ext2fs *fs;
344 char *bbp;
345 struct buf *bp;
346 int error, bno, start, end, loc;
347
348 fs = ip->i_e2fs;
349 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0 &&
350 fs->e2fs_gd[cg].ext2bgd_nbfree_hi == 0)
351 return 0;
352 error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
353 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
354 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
355 (int)fs->e2fs_bsize, B_MODIFY, &bp);
356 if (error) {
357 return 0;
358 }
359 bbp = (char *)bp->b_data;
360
361 if (dtog(fs, bpref) != cg)
362 bpref = 0;
363
364 /* initialize block bitmap now if uninit */
365 if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
366 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) {
367 ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp);
368 fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT);
369 }
370
371 if (bpref != 0) {
372 bpref = dtogd(fs, bpref);
373 /*
374 * if the requested block is available, use it
375 */
376 if (isclr(bbp, bpref)) {
377 bno = bpref;
378 goto gotit;
379 }
380 }
381 /*
382 * no blocks in the requested cylinder, so take next
383 * available one in this cylinder group.
384 * first try to get 8 contiguous blocks, then fall back to a single
385 * block.
386 */
387 if (bpref)
388 start = dtogd(fs, bpref) / NBBY;
389 else
390 start = 0;
391 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
392 for (loc = start; loc < end; loc++) {
393 if (bbp[loc] == 0) {
394 bno = loc * NBBY;
395 goto gotit;
396 }
397 }
398 for (loc = 0; loc < start; loc++) {
399 if (bbp[loc] == 0) {
400 bno = loc * NBBY;
401 goto gotit;
402 }
403 }
404
405 bno = ext2fs_mapsearch(fs, bbp, bpref);
406 #if 0
407 /*
408 * XXX jdolecek mapsearch actually never fails, it panics instead.
409 * If re-enabling, make sure to brele() before returning.
410 */
411 if (bno < 0)
412 return 0;
413 #endif
414 gotit:
415 #ifdef DIAGNOSTIC
416 if (isset(bbp, (daddr_t)bno)) {
417 printf("%s: cg=%d bno=%d fs=%s\n", __func__,
418 cg, bno, fs->e2fs_fsmnt);
419 panic("ext2fs_alloccg: dup alloc");
420 }
421 #endif
422 setbit(bbp, (daddr_t)bno);
423 fs->e2fs.e2fs_fbcount--;
424 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0);
425 fs->e2fs_fmod = 1;
426 bdwrite(bp);
427 return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno;
428 }
429
430 /*
431 * Determine whether an inode can be allocated.
432 *
433 * Check to see if an inode is available, and if it is,
434 * allocate it using the following policy:
435 * 1) allocate the requested inode.
436 * 2) allocate the next available inode after the requested
437 * inode in the specified cylinder group.
438 */
439 static daddr_t
ext2fs_nodealloccg(struct inode * ip,int cg,daddr_t ipref,int mode)440 ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
441 {
442 struct m_ext2fs *fs;
443 char *ibp;
444 struct buf *bp;
445 int error, start, len, loc, map, i;
446
447 ipref--; /* to avoid a lot of (ipref -1) */
448 if (ipref == -1)
449 ipref = 0;
450 fs = ip->i_e2fs;
451 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0 &&
452 fs->e2fs_gd[cg].ext2bgd_nifree_hi == 0)
453 return 0;
454 error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
455 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
456 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
457 (int)fs->e2fs_bsize, B_MODIFY, &bp);
458 if (error) {
459 return 0;
460 }
461 ibp = (char *)bp->b_data;
462
463 KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
464
465 /* initialize inode bitmap now if uninit */
466 if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
467 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) {
468 KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg);
469 memset(ibp, 0, fs->e2fs_bsize);
470 fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT);
471 }
472
473 if (ipref) {
474 ipref %= fs->e2fs.e2fs_ipg;
475 if (isclr(ibp, ipref))
476 goto gotit;
477 }
478 start = ipref / NBBY;
479 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
480 loc = skpc(0xff, len, &ibp[start]);
481 if (loc == 0) {
482 len = start + 1;
483 start = 0;
484 loc = skpc(0xff, len, &ibp[0]);
485 if (loc == 0) {
486 printf("%s: cg = %d, ipref = %lld, fs = %s\n", __func__,
487 cg, (long long)ipref, fs->e2fs_fsmnt);
488 panic("%s: map corrupted", __func__);
489 /* NOTREACHED */
490 }
491 }
492 i = start + len - loc;
493 map = ibp[i] ^ 0xff;
494 if (map == 0) {
495 printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
496 panic("%s: inode not in map", __func__);
497 }
498 ipref = i * NBBY + ffs(map) - 1;
499 gotit:
500 setbit(ibp, ipref);
501 fs->e2fs.e2fs_ficount--;
502 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
503 0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref);
504 fs->e2fs_fmod = 1;
505 bdwrite(bp);
506 return cg * fs->e2fs.e2fs_ipg + ipref + 1;
507 }
508
509 /*
510 * Free a block.
511 *
512 * The specified block is placed back in the
513 * free map.
514 */
515 void
ext2fs_blkfree(struct inode * ip,daddr_t bno)516 ext2fs_blkfree(struct inode *ip, daddr_t bno)
517 {
518 struct m_ext2fs *fs;
519 char *bbp;
520 struct buf *bp;
521 int error, cg;
522
523 fs = ip->i_e2fs;
524 cg = dtog(fs, bno);
525
526 KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
527 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0);
528
529 if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
530 printf("%s: bad block %jd, ino %ju\n",
531 __func__, (intmax_t)bno, (uintmax_t)ip->i_number);
532 ext2fs_fserr(fs, ip->i_uid, "bad block");
533 return;
534 }
535 error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
536 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
537 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
538 (int)fs->e2fs_bsize, B_MODIFY, &bp);
539 if (error) {
540 return;
541 }
542 bbp = (char *)bp->b_data;
543 bno = dtogd(fs, bno);
544 if (isclr(bbp, bno)) {
545 printf("%s: dev = %#jx, block = %jd, fs = %s\n", __func__,
546 (uintmax_t)ip->i_dev, (intmax_t)bno,
547 fs->e2fs_fsmnt);
548 panic("%s: freeing free block", __func__);
549 }
550 clrbit(bbp, bno);
551 fs->e2fs.e2fs_fbcount++;
552 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0);
553 fs->e2fs_fmod = 1;
554 bdwrite(bp);
555 }
556
557 /*
558 * Free an inode.
559 *
560 * The specified inode is placed back in the free map.
561 */
562 int
ext2fs_vfree(struct vnode * pvp,ino_t ino,int mode)563 ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
564 {
565 struct m_ext2fs *fs;
566 char *ibp;
567 struct inode *pip;
568 struct buf *bp;
569 int error, cg;
570
571 pip = VTOI(pvp);
572 fs = pip->i_e2fs;
573
574 if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
575 panic("%s: range: dev = %#jx, ino = %ju, fs = %s",
576 __func__, (uintmax_t)pip->i_dev, (uintmax_t)ino,
577 fs->e2fs_fsmnt);
578
579 cg = ino_to_cg(fs, ino);
580
581 KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
582 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
583
584 error = bread(pip->i_devvp, EXT2_FSBTODB64(fs,
585 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
586 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
587 (int)fs->e2fs_bsize, B_MODIFY, &bp);
588 if (error) {
589 return 0;
590 }
591 ibp = (char *)bp->b_data;
592 ino = (ino - 1) % fs->e2fs.e2fs_ipg;
593 if (isclr(ibp, ino)) {
594 printf("%s: dev = %#jx, ino = %ju, fs = %s\n", __func__,
595 (uintmax_t)pip->i_dev,
596 (uintmax_t)ino, fs->e2fs_fsmnt);
597 if (fs->e2fs_ronly == 0)
598 panic("%s: freeing free inode", __func__);
599 }
600 clrbit(ibp, ino);
601 fs->e2fs.e2fs_ficount++;
602 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
603 0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0);
604 fs->e2fs_fmod = 1;
605 bdwrite(bp);
606 return 0;
607 }
608
609 /*
610 * Find a block in the specified cylinder group.
611 *
612 * It is a panic if a request is made to find a block if none are
613 * available.
614 */
615
616 static daddr_t
ext2fs_mapsearch(struct m_ext2fs * fs,char * bbp,daddr_t bpref)617 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
618 {
619 int start, len, loc, i, map;
620
621 /*
622 * find the fragment by searching through the free block
623 * map for an appropriate bit pattern
624 */
625 if (bpref)
626 start = dtogd(fs, bpref) / NBBY;
627 else
628 start = 0;
629 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
630 loc = skpc(0xff, len, &bbp[start]);
631 if (loc == 0) {
632 len = start + 1;
633 start = 0;
634 loc = skpc(0xff, len, &bbp[start]);
635 if (loc == 0) {
636 printf("%s: start = %d, len = %d, fs = %s\n",
637 __func__, start, len, fs->e2fs_fsmnt);
638 panic("%s: map corrupted", __func__);
639 /* NOTREACHED */
640 }
641 }
642 i = start + len - loc;
643 map = bbp[i] ^ 0xff;
644 if (map == 0) {
645 printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
646 panic("%s: block not in map", __func__);
647 }
648 return i * NBBY + ffs(map) - 1;
649 }
650
651 /*
652 * Fserr prints the name of a file system with an error diagnostic.
653 *
654 * The form of the error message is:
655 * fs: error message
656 */
657 static void
ext2fs_fserr(struct m_ext2fs * fs,u_int uid,const char * cp)658 ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
659 {
660
661 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
662 }
663
664 static __inline void
ext2fs_cg_update(struct m_ext2fs * fs,int cg,struct ext2_gd * gd,int nbfree,int nifree,int ndirs,daddr_t ioff)665 ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
666 {
667 if (nifree) {
668 uint32_t ext2bgd_nifree = fs2h16(gd->ext2bgd_nifree) |
669 (fs2h16(gd->ext2bgd_nifree_hi) << 16);
670 ext2bgd_nifree += nifree;
671 gd->ext2bgd_nifree = h2fs16(ext2bgd_nifree);
672 gd->ext2bgd_nifree_hi = h2fs16(ext2bgd_nifree >> 16);
673 /*
674 * If we allocated inode on bigger offset than what was
675 * ever used before, bump the itable_unused count. This
676 * member only ever grows, and is used only for initialization
677 * !INODE_ZEROED groups with used inodes. Of course, by the
678 * time we get here the itables are already zeroed, but
679 * e2fstools fsck.ext4 still checks this.
680 */
681 if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 &&
682 (ioff + 1) >= (fs->e2fs.e2fs_ipg -
683 fs2h16(gd->ext2bgd_itable_unused_lo))) {
684 gd->ext2bgd_itable_unused_lo =
685 h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
686 }
687
688 KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
689 gd->ext2bgd_itable_unused_lo <= ext2bgd_nifree);
690 }
691
692
693 if (nbfree) {
694 uint32_t ext2bgd_nbfree = fs2h16(gd->ext2bgd_nbfree)
695 | (fs2h16(gd->ext2bgd_nbfree_hi) << 16);
696 ext2bgd_nbfree += nbfree;
697 gd->ext2bgd_nbfree = h2fs16(ext2bgd_nbfree);
698 gd->ext2bgd_nbfree_hi = h2fs16(ext2bgd_nbfree >> 16);
699 }
700
701 if (ndirs) {
702 uint32_t ext2bgd_ndirs = fs2h16(gd->ext2bgd_ndirs)
703 | (fs2h16(gd->ext2bgd_ndirs_hi) << 16);
704 ext2bgd_ndirs += ndirs;
705 gd->ext2bgd_ndirs = h2fs16(ext2bgd_ndirs);
706 gd->ext2bgd_ndirs_hi = h2fs16(ext2bgd_ndirs >> 16);
707 }
708
709 if (E2FS_HAS_GD_CSUM(fs))
710 gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
711 }
712
713 static const uint32_t ext2fs_crc32c_table[256] = {
714 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
715 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
716 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
717 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
718 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
719 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
720 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
721 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
722 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
723 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
724 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
725 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
726 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
727 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
728 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
729 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
730 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
731 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
732 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
733 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
734 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
735 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
736 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
737 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
738 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
739 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
740 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
741 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
742 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
743 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
744 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
745 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
746 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
747 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
748 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
749 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
750 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
751 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
752 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
753 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
754 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
755 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
756 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
757 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
758 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
759 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
760 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
761 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
762 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
763 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
764 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
765 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
766 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
767 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
768 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
769 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
770 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
771 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
772 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
773 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
774 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
775 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
776 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
777 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
778 };
779
780 static uint32_t
ext2fs_crc32c(uint32_t last,const void * vbuf,size_t len)781 ext2fs_crc32c(uint32_t last, const void *vbuf, size_t len)
782 {
783 uint32_t crc = last;
784 const uint8_t *buf = vbuf;
785
786 while (len--)
787 crc = ext2fs_crc32c_table[(crc ^ *buf++) & 0xff] ^ (crc >> 8);
788
789 return crc;
790 }
791
792 /*
793 * Compute group description csum. Structure data must be LE (not host).
794 * Returned as LE (disk encoding).
795 */
796 static uint16_t
ext2fs_cg_get_csum(struct m_ext2fs * fs,int cg,struct ext2_gd * gd)797 ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
798 {
799 uint16_t crc;
800 size_t cgsize = 1 << fs->e2fs_group_desc_shift;
801 uint32_t cg_bswapped = h2fs32((uint32_t)cg);
802 size_t off;
803
804 if (EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
805 uint32_t crc32;
806 uint8_t dummy[2] = {0, 0};
807
808 off = offsetof(struct ext2_gd, ext2bgd_checksum);
809
810 crc32 = ext2fs_crc32c(~0, fs->e2fs.e2fs_uuid,
811 sizeof(fs->e2fs.e2fs_uuid));
812 crc32 = ext2fs_crc32c(crc32, &cg_bswapped, sizeof(cg_bswapped));
813 crc32 = ext2fs_crc32c(crc32, gd, off);
814 crc32 = ext2fs_crc32c(crc32, dummy, 2);
815 crc32 = ext2fs_crc32c(crc32, gd + off + 2, cgsize - (off + 2));
816
817 return h2fs16(crc32 & 0xffff);
818 }
819
820 if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM))
821 return 0;
822
823 off = offsetof(struct ext2_gd, ext2bgd_checksum);
824
825 crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid,
826 sizeof(fs->e2fs.e2fs_uuid));
827 crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
828 crc = crc16(crc, (uint8_t *)gd, off);
829 crc = crc16(crc, (uint8_t *)gd + off + 2, cgsize - (off + 2));
830
831 return h2fs16(crc);
832 }
833
834 static void
ext2fs_init_bb(struct m_ext2fs * fs,int cg,struct ext2_gd * gd,char * bbp)835 ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
836 {
837 int i;
838
839 memset(bbp, 0, fs->e2fs_bsize);
840
841 /*
842 * No block was ever allocated on this cg before, so the only used
843 * blocks are metadata blocks on start of the group. We could optimize
844 * this to set by bytes, but since this is done once per the group
845 * in lifetime of filesystem, it really is not worth it.
846 */
847 for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++)
848 setbit(bbp, i);
849 }
850
851 /*
852 * Verify csum and initialize itable if not done already
853 */
854 int
ext2fs_cg_verify_and_initialize(struct vnode * devvp,struct m_ext2fs * fs,int ronly)855 ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
856 {
857 struct ext2_gd *gd;
858 ino_t ioff;
859 size_t boff;
860 struct buf *bp;
861 int cg, i, error;
862
863 if (!E2FS_HAS_GD_CSUM(fs))
864 return 0;
865
866 for(cg = 0; cg < fs->e2fs_ncg; cg++) {
867 gd = &fs->e2fs_gd[cg];
868
869 /* Verify checksum */
870 uint16_t csum = ext2fs_cg_get_csum(fs, cg, gd);
871 if (gd->ext2bgd_checksum != csum) {
872 printf("%s: group %d invalid csum (%#x != %#x)\n",
873 __func__, cg, gd->ext2bgd_checksum, csum);
874 return EINVAL;
875 }
876
877 /* if mounting read-write, zero itable if not already done */
878 if (ronly ||
879 (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0)
880 continue;
881
882 /*
883 * We are skipping already used inodes, zero rest of itable
884 * blocks. First block to zero could be only partial wipe, all
885 * others are wiped completely. This might take a while,
886 * there could be many inode table blocks. We use
887 * delayed writes, so this shouldn't block for very
888 * long.
889 */
890 ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo);
891 boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs);
892
893 for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) {
894 if (boff) {
895 /* partial wipe, must read old data */
896 error = bread(devvp, EXT2_FSBTODB64OFF(fs,
897 fs2h32(gd->ext2bgd_i_tables),
898 fs2h32(gd->ext2bgd_i_tables_hi), i),
899 (int)fs->e2fs_bsize, B_MODIFY, &bp);
900 if (error) {
901 printf("%s: can't read itable block",
902 __func__);
903 return error;
904 }
905 memset((char *)bp->b_data + boff, 0,
906 fs->e2fs_bsize - boff);
907 boff = 0;
908 } else {
909 /*
910 * Complete wipe, don't need to read data. This
911 * assumes nothing else is changing the data.
912 */
913 bp = getblk(devvp, EXT2_FSBTODB64OFF(fs,
914 fs2h32(gd->ext2bgd_i_tables),
915 fs2h32(gd->ext2bgd_i_tables_hi), i),
916 (int)fs->e2fs_bsize, 0, 0);
917 clrbuf(bp);
918 }
919
920 bdwrite(bp);
921 }
922
923 gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
924 gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
925 fs->e2fs_fmod = 1;
926 }
927
928 return 0;
929 }
930