xref: /openbsd-src/sys/ufs/ext2fs/ext2fs_alloc.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: ext2fs_alloc.c,v 1.33 2014/07/14 08:54:13 pelikan Exp $	*/
2 /*	$NetBSD: ext2fs_alloc.c,v 1.10 2001/07/05 08:38:27 toshii Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Manuel Bouyer.
6  * Copyright (c) 1982, 1986, 1989, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
34  *  Modified for ext2fs by Manuel Bouyer.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/vnode.h>
42 #include <sys/mount.h>
43 #include <sys/kernel.h>
44 #include <sys/syslog.h>
45 
46 #include <ufs/ufs/quota.h>
47 #include <ufs/ufs/inode.h>
48 #include <ufs/ufs/ufsmount.h>
49 #include <ufs/ufs/ufs_extern.h>
50 
51 #include <ufs/ext2fs/ext2fs.h>
52 #include <ufs/ext2fs/ext2fs_extern.h>
53 
54 u_long ext2gennumber;
55 
56 static u_int32_t	ext2fs_alloccg(struct inode *, int, u_int32_t, int);
57 static int		ext2fs_dirpref(struct m_ext2fs *);
58 static void		ext2fs_fserr(struct m_ext2fs *, uid_t, char *);
59 static u_int32_t	ext2fs_hashalloc(struct inode *, int, u_int32_t, int,
60 			    u_int32_t (*)(struct inode *, int, u_int32_t, int));
61 static ufsino_t		ext2fs_nodealloccg(struct inode *, int, ufsino_t, int);
62 static u_int32_t	ext2fs_mapsearch(struct m_ext2fs *, char *, u_int32_t);
63 
64 /*
65  * Allocate a block in the file system.
66  *
67  * A preference may be optionally specified. If a preference is given
68  * the following hierarchy is used to allocate a block:
69  *   1) allocate the requested block.
70  *   2) allocate a rotationally optimal block in the same cylinder.
71  *   3) allocate a block in the same cylinder group.
72  *   4) quadratically rehash into other cylinder groups, until an
73  *	  available block is located.
74  * If no block preference is given the following hierarchy is used
75  * to allocate a block:
76  *   1) allocate a block in the cylinder group that contains the
77  *	  inode for the file.
78  *   2) quadratically rehash into other cylinder groups, until an
79  *	  available block is located.
80  */
81 int
82 ext2fs_alloc(struct inode *ip, u_int32_t lbn, u_int32_t bpref,
83     struct ucred *cred, u_int32_t *bnp)
84 {
85 	struct m_ext2fs *fs;
86 	u_int32_t bno;
87 	int cg;
88 
89 	*bnp = 0;
90 	fs = ip->i_e2fs;
91 #ifdef DIAGNOSTIC
92 	if (cred == NOCRED)
93 		panic("ext2fs_alloc: missing credential");
94 #endif /* DIAGNOSTIC */
95 	if (fs->e2fs.e2fs_fbcount == 0)
96 		goto nospace;
97 	if (cred->cr_uid != 0 && freespace(fs) <= 0)
98 		goto nospace;
99 	if (bpref >= fs->e2fs.e2fs_bcount)
100 		bpref = 0;
101 	if (bpref == 0)
102 		cg = ino_to_cg(fs, ip->i_number);
103 	else
104 		cg = dtog(fs, bpref);
105 	bno = ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize, ext2fs_alloccg);
106 	if (bno > 0) {
107 		ip->i_e2fs_nblock += btodb(fs->e2fs_bsize);
108 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
109 		*bnp = bno;
110 		return (0);
111 	}
112 nospace:
113 	ext2fs_fserr(fs, cred->cr_uid, "file system full");
114 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
115 	return (ENOSPC);
116 }
117 
118 /*
119  * Allocate an inode in the file system.
120  *
121  * If allocating a directory, use ext2fs_dirpref to select the inode.
122  * If allocating in a directory, the following hierarchy is followed:
123  *   1) allocate the preferred inode.
124  *   2) allocate an inode in the same cylinder group.
125  *   3) quadratically rehash into other cylinder groups, until an
126  *	  available inode is located.
127  * If no inode preference is given the following hierarchy is used
128  * to allocate an inode:
129  *   1) allocate an inode in cylinder group 0.
130  *   2) quadratically rehash into other cylinder groups, until an
131  *	  available inode is located.
132  */
133 int
134 ext2fs_inode_alloc(struct inode *pip, mode_t mode, struct ucred *cred,
135     struct vnode **vpp)
136 {
137 	struct vnode *pvp;
138 	struct m_ext2fs *fs;
139 	struct inode *ip;
140 	ufsino_t ino, ipref;
141 	int cg, error;
142 
143 	*vpp = NULL;
144 	pvp = ITOV(pip);
145 	fs = pip->i_e2fs;
146 	if (fs->e2fs.e2fs_ficount == 0)
147 		goto noinodes;
148 
149 	if ((mode & IFMT) == IFDIR)
150 		cg = ext2fs_dirpref(fs);
151 	else
152 		cg = ino_to_cg(fs, pip->i_number);
153 	ipref = cg * fs->e2fs.e2fs_ipg + 1;
154 	ino = ext2fs_hashalloc(pip, cg, ipref, mode, ext2fs_nodealloccg);
155 	if (ino == 0)
156 		goto noinodes;
157 	error = VFS_VGET(pvp->v_mount, ino, vpp);
158 	if (error) {
159 		ext2fs_inode_free(pip, ino, mode);
160 		return (error);
161 	}
162 	ip = VTOI(*vpp);
163 	if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) {
164 		printf("mode = 0%o, nlinks %u, inum = %u, fs = %s\n",
165 		    ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number,
166 		    fs->e2fs_fsmnt);
167 		panic("ext2fs_valloc: dup alloc");
168 	}
169 
170 	memset(ip->i_e2din, 0, sizeof(struct ext2fs_dinode));
171 
172 	/*
173 	 * Set up a new generation number for this inode.
174 	 */
175 	if (++ext2gennumber < (u_long)time_second)
176 		ext2gennumber = time_second;
177 	ip->i_e2fs_gen = ext2gennumber;
178 	return (0);
179  noinodes:
180 	ext2fs_fserr(fs, cred->cr_uid, "out of inodes");
181 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
182 	return (ENOSPC);
183 }
184 
185 /*
186  * Find a cylinder to place a directory.
187  *
188  * The policy implemented by this algorithm is to select from
189  * among those cylinder groups with above the average number of
190  * free inodes, the one with the smallest number of directories.
191  */
192 static int
193 ext2fs_dirpref(struct m_ext2fs *fs)
194 {
195 	int cg, maxspace, mincg, avgifree;
196 
197 	avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
198 	maxspace = 0;
199 	mincg = -1;
200 	for (cg = 0; cg < fs->e2fs_ncg; cg++)
201 		if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) {
202 			if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) {
203 				mincg = cg;
204 				maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree;
205 			}
206 		}
207 	return mincg;
208 }
209 
210 /*
211  * Select the desired position for the next block in a file.  The file is
212  * logically divided into sections. The first section is composed of the
213  * direct blocks. Each additional section contains fs_maxbpg blocks.
214  *
215  * If no blocks have been allocated in the first section, the policy is to
216  * request a block in the same cylinder group as the inode that describes
217  * the file. Otherwise, the policy is to try to allocate the blocks
218  * contigously. The two fields of the ext2 inode extension (see
219  * ufs/ufs/inode.h) help this.
220  */
221 daddr_t
222 ext2fs_blkpref(struct inode *ip, u_int32_t lbn, int baps, u_int32_t *bap)
223 {
224 	struct m_ext2fs *fs;
225 	int cg, i;
226 
227 	fs = ip->i_e2fs;
228 	/*
229 	 * if we are doing contigous lbn allocation, try to alloc blocks
230 	 * contigously on disk
231 	 */
232 
233 	if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
234 		return ip->i_e2fs_last_blk + 1;
235 	}
236 
237 	/*
238 	 * bap, if provided, gives us a list of blocks to which we want to
239 	 * stay close
240 	 */
241 
242 	if (bap) {
243 		for (i = baps; i >= 0 ; i--) {
244 			if (bap[i]) {
245 				return letoh32(bap[i]) + 1;
246 			}
247 		}
248 	}
249 
250 	/* fall back to the first block of the cylinder containing the inode */
251 
252 	cg = ino_to_cg(fs, ip->i_number);
253 	return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
254 }
255 
256 /*
257  * Implement the cylinder overflow algorithm.
258  *
259  * The policy implemented by this algorithm is:
260  *   1) allocate the block in its requested cylinder group.
261  *   2) quadratically rehash on the cylinder group number.
262  *   3) brute force search for a free block.
263  */
264 static u_int32_t
265 ext2fs_hashalloc(struct inode *ip, int cg, u_int32_t pref, int size,
266     u_int32_t (*allocator)(struct inode *, int, u_int32_t, int))
267 {
268 	struct m_ext2fs *fs;
269 	long result;
270 	int i, icg = cg;
271 
272 	fs = ip->i_e2fs;
273 	/*
274 	 * 1: preferred cylinder group
275 	 */
276 	result = (*allocator)(ip, cg, pref, size);
277 	if (result)
278 		return (result);
279 	/*
280 	 * 2: quadratic rehash
281 	 */
282 	for (i = 1; i < fs->e2fs_ncg; i *= 2) {
283 		cg += i;
284 		if (cg >= fs->e2fs_ncg)
285 			cg -= fs->e2fs_ncg;
286 		result = (*allocator)(ip, cg, 0, size);
287 		if (result)
288 			return (result);
289 	}
290 	/*
291 	 * 3: brute force search
292 	 * Note that we start at i == 2, since 0 was checked initially,
293 	 * and 1 is always checked in the quadratic rehash.
294 	 */
295 	cg = (icg + 2) % fs->e2fs_ncg;
296 	for (i = 2; i < fs->e2fs_ncg; i++) {
297 		result = (*allocator)(ip, cg, 0, size);
298 		if (result)
299 			return (result);
300 		cg++;
301 		if (cg == fs->e2fs_ncg)
302 			cg = 0;
303 	}
304 	return (0);
305 }
306 
307 /*
308  * Determine whether a block can be allocated.
309  *
310  * Check to see if a block of the appropriate size is available,
311  * and if it is, allocate it.
312  */
313 static u_int32_t
314 ext2fs_alloccg(struct inode *ip, int cg, u_int32_t bpref, int size)
315 {
316 	struct m_ext2fs *fs;
317 	char *bbp;
318 	struct buf *bp;
319 	u_int32_t bno;
320 	int error, start, end, loc;
321 
322 	fs = ip->i_e2fs;
323 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
324 		return (0);
325 	error = bread(ip->i_devvp, fsbtodb(fs,
326 	    fs->e2fs_gd[cg].ext2bgd_b_bitmap), (int)fs->e2fs_bsize, &bp);
327 	if (error || fs->e2fs_gd[cg].ext2bgd_nbfree == 0) {
328 		brelse(bp);
329 		return (0);
330 	}
331 	bbp = (char *)bp->b_data;
332 
333 	if (dtog(fs, bpref) != cg)
334 		bpref = 0;
335 	if (bpref != 0) {
336 		bpref = dtogd(fs, bpref);
337 		/*
338 		 * if the requested block is available, use it
339 		 */
340 		if (isclr(bbp, bpref)) {
341 			bno = bpref;
342 			goto gotit;
343 		}
344 	}
345 	/*
346 	 * no blocks in the requested cylinder, so take next
347 	 * available one in this cylinder group.
348 	 * first try to get 8 contigous blocks, then fall back to a single
349 	 * block.
350 	 */
351 	if (bpref)
352 		start = dtogd(fs, bpref) / NBBY;
353 	else
354 		start = 0;
355 	end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
356 	for (loc = start; loc < end; loc++) {
357 		if (bbp[loc] == 0) {
358 			bno = loc * NBBY;
359 			goto gotit;
360 		}
361 	}
362 	for (loc = 0; loc < start; loc++) {
363 		if (bbp[loc] == 0) {
364 			bno = loc * NBBY;
365 			goto gotit;
366 		}
367 	}
368 
369 	bno = ext2fs_mapsearch(fs, bbp, bpref);
370 	if (bno < 0)
371 		return (0);
372  gotit:
373 #ifdef DIAGNOSTIC
374 	if (isset(bbp, bno)) {
375 		panic("%s: dup alloc: cg=%d bno=%u fs=%s\n",
376 		    __func__, cg, bno, fs->e2fs_fsmnt);
377 	}
378 #endif
379 	setbit(bbp, bno);
380 	fs->e2fs.e2fs_fbcount--;
381 	fs->e2fs_gd[cg].ext2bgd_nbfree--;
382 	fs->e2fs_fmod = 1;
383 	bdwrite(bp);
384 	return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno);
385 }
386 
387 /*
388  * Determine whether an inode can be allocated.
389  *
390  * Check to see if an inode is available, and if it is,
391  * allocate it using the following policy:
392  *   1) allocate the requested inode.
393  *   2) allocate the next available inode after the requested
394  *	  inode in the specified cylinder group.
395  */
396 static ufsino_t
397 ext2fs_nodealloccg(struct inode *ip, int cg, ufsino_t ipref, int mode)
398 {
399 	struct m_ext2fs *fs;
400 	char *ibp;
401 	struct buf *bp;
402 	int error, start, len, loc, map, i;
403 
404 	ipref--; /* to avoid a lot of (ipref -1) */
405 	fs = ip->i_e2fs;
406 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
407 		return (0);
408 	error = bread(ip->i_devvp, fsbtodb(fs,
409 	    fs->e2fs_gd[cg].ext2bgd_i_bitmap), (int)fs->e2fs_bsize, &bp);
410 	if (error) {
411 		brelse(bp);
412 		return (0);
413 	}
414 	ibp = (char *)bp->b_data;
415 	if (ipref) {
416 		ipref %= fs->e2fs.e2fs_ipg;
417 		if (isclr(ibp, ipref))
418 			goto gotit;
419 	}
420 	start = ipref / NBBY;
421 	len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
422 	loc = skpc(0xff, len, &ibp[start]);
423 	if (loc == 0) {
424 		len = start + 1;
425 		start = 0;
426 		loc = skpc(0xff, len, &ibp[0]);
427 		if (loc == 0) {
428 			printf("cg = %d, ipref = %u, fs = %s\n",
429 			    cg, ipref, fs->e2fs_fsmnt);
430 			panic("ext2fs_nodealloccg: map corrupted");
431 			/* NOTREACHED */
432 		}
433 	}
434 	i = start + len - loc;
435 	map = ibp[i];
436 	ipref = i * NBBY;
437 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
438 		if ((map & i) == 0) {
439 			goto gotit;
440 		}
441 	}
442 	printf("fs = %s\n", fs->e2fs_fsmnt);
443 	panic("ext2fs_nodealloccg: block not in map");
444 	/* NOTREACHED */
445  gotit:
446 	setbit(ibp, ipref);
447 	fs->e2fs.e2fs_ficount--;
448 	fs->e2fs_gd[cg].ext2bgd_nifree--;
449 	fs->e2fs_fmod = 1;
450 	if ((mode & IFMT) == IFDIR) {
451 		fs->e2fs_gd[cg].ext2bgd_ndirs++;
452 	}
453 	bdwrite(bp);
454 	return (cg * fs->e2fs.e2fs_ipg + ipref + 1);
455 }
456 
457 /*
458  * Free a block.
459  *
460  * The specified block is placed back in the
461  * free map.
462  */
463 void
464 ext2fs_blkfree(struct inode *ip, u_int32_t bno)
465 {
466 	struct m_ext2fs *fs;
467 	char *bbp;
468 	struct buf *bp;
469 	int error, cg;
470 
471 	fs = ip->i_e2fs;
472 	cg = dtog(fs, bno);
473 	if (bno >= fs->e2fs.e2fs_bcount) {
474 		printf("bad block %u, ino %u\n", bno, ip->i_number);
475 		ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block");
476 		return;
477 	}
478 	error = bread(ip->i_devvp,
479 	    fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
480 	    (int)fs->e2fs_bsize, &bp);
481 	if (error) {
482 		brelse(bp);
483 		return;
484 	}
485 	bbp = (char *)bp->b_data;
486 	bno = dtogd(fs, bno);
487 	if (isclr(bbp, bno))
488 		panic("%s: freeing free block: dev = 0x%x, block = %u, fs = %s\n",
489 		    __func__, ip->i_dev, bno, fs->e2fs_fsmnt);
490 
491 	clrbit(bbp, bno);
492 	fs->e2fs.e2fs_fbcount++;
493 	fs->e2fs_gd[cg].ext2bgd_nbfree++;
494 
495 	fs->e2fs_fmod = 1;
496 	bdwrite(bp);
497 }
498 
499 /*
500  * Free an inode.
501  *
502  * The specified inode is placed back in the free map.
503  */
504 void
505 ext2fs_inode_free(struct inode *pip, ufsino_t ino, mode_t mode)
506 {
507 	struct m_ext2fs *fs;
508 	char *ibp;
509 	struct buf *bp;
510 	int error, cg;
511 
512 	fs = pip->i_e2fs;
513 	if (ino > fs->e2fs.e2fs_icount || ino < EXT2_FIRSTINO)
514 		panic("ifree: range: dev = 0x%x, ino = %u, fs = %s",
515 		    pip->i_dev, ino, fs->e2fs_fsmnt);
516 	cg = ino_to_cg(fs, ino);
517 	error = bread(pip->i_devvp,
518 	    fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
519 	    (int)fs->e2fs_bsize, &bp);
520 	if (error) {
521 		brelse(bp);
522 		return;
523 	}
524 	ibp = (char *)bp->b_data;
525 	ino = (ino - 1) % fs->e2fs.e2fs_ipg;
526 	if (isclr(ibp, ino)) {
527 		printf("dev = 0x%x, ino = %d, fs = %s\n",
528 		    pip->i_dev, ino, fs->e2fs_fsmnt);
529 		if (fs->e2fs_ronly == 0)
530 			panic("ifree: freeing free inode");
531 	}
532 	clrbit(ibp, ino);
533 	fs->e2fs.e2fs_ficount++;
534 	fs->e2fs_gd[cg].ext2bgd_nifree++;
535 	if ((mode & IFMT) == IFDIR) {
536 		fs->e2fs_gd[cg].ext2bgd_ndirs--;
537 	}
538 	fs->e2fs_fmod = 1;
539 	bdwrite(bp);
540 }
541 
542 /*
543  * Find a block in the specified cylinder group.
544  *
545  * It is a panic if a request is made to find a block if none are
546  * available.
547  */
548 
549 static u_int32_t
550 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, u_int32_t bpref)
551 {
552 	u_int32_t bno;
553 	int start, len, loc, i, map;
554 
555 	/*
556 	 * find the fragment by searching through the free block
557 	 * map for an appropriate bit pattern
558 	 */
559 	if (bpref)
560 		start = dtogd(fs, bpref) / NBBY;
561 	else
562 		start = 0;
563 	len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
564 	loc = skpc(0xff, len, &bbp[start]);
565 	if (loc == 0) {
566 		len = start + 1;
567 		start = 0;
568 		loc = skpc(0xff, len, &bbp[start]);
569 		if (loc == 0) {
570 			printf("start = %d, len = %d, fs = %s\n",
571 				start, len, fs->e2fs_fsmnt);
572 			panic("ext2fs_alloccg: map corrupted");
573 			/* NOTREACHED */
574 		}
575 	}
576 	i = start + len - loc;
577 	map = bbp[i];
578 	bno = i * NBBY;
579 	for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
580 		if ((map & i) == 0)
581 			return (bno);
582 	}
583 	printf("fs = %s\n", fs->e2fs_fsmnt);
584 	panic("ext2fs_mapsearch: block not in map");
585 	/* NOTREACHED */
586 }
587 
588 /*
589  * Fserr prints the name of a file system with an error diagnostic.
590  *
591  * The form of the error message is:
592  *	fs: error message
593  */
594 static void
595 ext2fs_fserr(struct m_ext2fs *fs, uid_t uid, char *cp)
596 {
597 	log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
598 }
599