xref: /openbsd-src/sys/ufs/ext2fs/ext2fs_alloc.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: ext2fs_alloc.c,v 1.25 2008/01/05 19:49:26 otto Exp $	*/
2 /*	$NetBSD: ext2fs_alloc.c,v 1.10 2001/07/05 08:38:27 toshii Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Manuel Bouyer.
6  * Copyright (c) 1982, 1986, 1989, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_alloc.c	8.11 (Berkeley) 10/27/94
34  *  Modified for ext2fs by Manuel Bouyer.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/vnode.h>
42 #include <sys/mount.h>
43 #include <sys/kernel.h>
44 #include <sys/syslog.h>
45 
46 #include <ufs/ufs/quota.h>
47 #include <ufs/ufs/inode.h>
48 #include <ufs/ufs/ufsmount.h>
49 #include <ufs/ufs/ufs_extern.h>
50 
51 #include <ufs/ext2fs/ext2fs.h>
52 #include <ufs/ext2fs/ext2fs_extern.h>
53 
54 u_long ext2gennumber;
55 
56 static int32_t	ext2fs_alloccg(struct inode *, int, int32_t, int);
57 static u_long	ext2fs_dirpref(struct m_ext2fs *);
58 static void	ext2fs_fserr(struct m_ext2fs *, uid_t, char *);
59 static u_long	ext2fs_hashalloc(struct inode *, int, long, int,
60 		    int32_t (*)(struct inode *, int, int32_t, int));
61 static int32_t	ext2fs_nodealloccg(struct inode *, int, int32_t, int);
62 static int32_t	ext2fs_mapsearch(struct m_ext2fs *, char *, int32_t);
63 
64 /*
65  * Allocate a block in the file system.
66  *
67  * A preference may be optionally specified. If a preference is given
68  * the following hierarchy is used to allocate a block:
69  *   1) allocate the requested block.
70  *   2) allocate a rotationally optimal block in the same cylinder.
71  *   3) allocate a block in the same cylinder group.
72  *   4) quadratically rehash into other cylinder groups, until an
73  *	  available block is located.
74  * If no block preference is given the following hierarchy is used
75  * to allocate a block:
76  *   1) allocate a block in the cylinder group that contains the
77  *	  inode for the file.
78  *   2) quadratically rehash into other cylinder groups, until an
79  *	  available block is located.
80  */
81 int
82 ext2fs_alloc(struct inode *ip, int32_t lbn, int32_t bpref,
83     struct ucred *cred, int32_t *bnp)
84 {
85 	struct m_ext2fs *fs;
86 	int32_t bno;
87 	int cg;
88 
89 	*bnp = 0;
90 	fs = ip->i_e2fs;
91 #ifdef DIAGNOSTIC
92 	if (cred == NOCRED)
93 		panic("ext2fs_alloc: missing credential");
94 #endif /* DIAGNOSTIC */
95 	if (fs->e2fs.e2fs_fbcount == 0)
96 		goto nospace;
97 	if (cred->cr_uid != 0 && freespace(fs) <= 0)
98 		goto nospace;
99 	if (bpref >= fs->e2fs.e2fs_bcount)
100 		bpref = 0;
101 	if (bpref == 0)
102 		cg = ino_to_cg(fs, ip->i_number);
103 	else
104 		cg = dtog(fs, bpref);
105 	bno = (int32_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
106 						 ext2fs_alloccg);
107 	if (bno > 0) {
108 		ip->i_e2fs_nblock += btodb(fs->e2fs_bsize);
109 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
110 		*bnp = bno;
111 		return (0);
112 	}
113 nospace:
114 	ext2fs_fserr(fs, cred->cr_uid, "file system full");
115 	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
116 	return (ENOSPC);
117 }
118 
119 /*
120  * Allocate an inode in the file system.
121  *
122  * If allocating a directory, use ext2fs_dirpref to select the inode.
123  * If allocating in a directory, the following hierarchy is followed:
124  *   1) allocate the preferred inode.
125  *   2) allocate an inode in the same cylinder group.
126  *   3) quadratically rehash into other cylinder groups, until an
127  *	  available inode is located.
128  * If no inode preference is given the following hierarchy is used
129  * to allocate an inode:
130  *   1) allocate an inode in cylinder group 0.
131  *   2) quadratically rehash into other cylinder groups, until an
132  *	  available inode is located.
133  */
134 int
135 ext2fs_inode_alloc(struct inode *pip, mode_t mode, struct ucred *cred,
136     struct vnode **vpp)
137 {
138 	struct vnode *pvp;
139 	struct m_ext2fs *fs;
140 	struct inode *ip;
141 	ino_t ino, ipref;
142 	int cg, error;
143 
144 	*vpp = NULL;
145 	pvp = ITOV(pip);
146 	fs = pip->i_e2fs;
147 	if (fs->e2fs.e2fs_ficount == 0)
148 		goto noinodes;
149 
150 	if ((mode & IFMT) == IFDIR)
151 		cg = ext2fs_dirpref(fs);
152 	else
153 		cg = ino_to_cg(fs, pip->i_number);
154 	ipref = cg * fs->e2fs.e2fs_ipg + 1;
155 	ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
156 	if (ino == 0)
157 		goto noinodes;
158 	error = VFS_VGET(pvp->v_mount, ino, vpp);
159 	if (error) {
160 		ext2fs_inode_free(pip, ino, mode);
161 		return (error);
162 	}
163 	ip = VTOI(*vpp);
164 	if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) {
165 		printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n",
166 			ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt);
167 		panic("ext2fs_valloc: dup alloc");
168 	}
169 
170 	bzero(ip->i_e2din, sizeof(struct ext2fs_dinode));
171 
172 	/*
173 	 * Set up a new generation number for this inode.
174 	 */
175 	if (++ext2gennumber < (u_long)time_second)
176 		ext2gennumber = time_second;
177 	ip->i_e2fs_gen = ext2gennumber;
178 	return (0);
179 noinodes:
180 	ext2fs_fserr(fs, cred->cr_uid, "out of inodes");
181 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
182 	return (ENOSPC);
183 }
184 
185 /*
186  * Find a cylinder to place a directory.
187  *
188  * The policy implemented by this algorithm is to select from
189  * among those cylinder groups with above the average number of
190  * free inodes, the one with the smallest number of directories.
191  */
192 static u_long
193 ext2fs_dirpref(struct m_ext2fs *fs)
194 {
195 	int cg, maxspace, mincg, avgifree;
196 
197 	avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
198 	maxspace = 0;
199 	mincg = -1;
200 	for (cg = 0; cg < fs->e2fs_ncg; cg++)
201 		if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) {
202 			if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) {
203 				mincg = cg;
204 				maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree;
205 			}
206 		}
207 	return mincg;
208 }
209 
210 /*
211  * Select the desired position for the next block in a file.  The file is
212  * logically divided into sections. The first section is composed of the
213  * direct blocks. Each additional section contains fs_maxbpg blocks.
214  *
215  * If no blocks have been allocated in the first section, the policy is to
216  * request a block in the same cylinder group as the inode that describes
217  * the file. Otherwise, the policy is to try to allocate the blocks
218  * contigously. The two fields of the ext2 inode extension (see
219  * ufs/ufs/inode.h) help this.
220  */
221 daddr64_t
222 ext2fs_blkpref(struct inode *ip, int32_t lbn, int indx, int32_t *bap)
223 {
224 	struct m_ext2fs *fs;
225 	int cg, i;
226 
227 	fs = ip->i_e2fs;
228 	/*
229 	 * if we are doing contigous lbn allocation, try to alloc blocks
230 	 * contigously on disk
231 	 */
232 
233 	if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
234 		return ip->i_e2fs_last_blk + 1;
235 	}
236 
237 	/*
238 	 * bap, if provided, gives us a list of blocks to which we want to
239 	 * stay close
240 	 */
241 
242 	if (bap) {
243 		for (i = indx; i >= 0 ; i--) {
244 			if (bap[i]) {
245 				return fs2h32(bap[i]) + 1;
246 			}
247 		}
248 	}
249 
250 	/* fall back to the first block of the cylinder containing the inode */
251 
252 	cg = ino_to_cg(fs, ip->i_number);
253 	return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
254 }
255 
256 /*
257  * Implement the cylinder overflow algorithm.
258  *
259  * The policy implemented by this algorithm is:
260  *   1) allocate the block in its requested cylinder group.
261  *   2) quadratically rehash on the cylinder group number.
262  *   3) brute force search for a free block.
263  */
264 static u_long
265 ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
266     int32_t (*allocator)(struct inode *, int, int32_t, int))
267 {
268 	struct m_ext2fs *fs;
269 	long result;
270 	int i, icg = cg;
271 
272 	fs = ip->i_e2fs;
273 	/*
274 	 * 1: preferred cylinder group
275 	 */
276 	result = (*allocator)(ip, cg, pref, size);
277 	if (result)
278 		return (result);
279 	/*
280 	 * 2: quadratic rehash
281 	 */
282 	for (i = 1; i < fs->e2fs_ncg; i *= 2) {
283 		cg += i;
284 		if (cg >= fs->e2fs_ncg)
285 			cg -= fs->e2fs_ncg;
286 		result = (*allocator)(ip, cg, 0, size);
287 		if (result)
288 			return (result);
289 	}
290 	/*
291 	 * 3: brute force search
292 	 * Note that we start at i == 2, since 0 was checked initially,
293 	 * and 1 is always checked in the quadratic rehash.
294 	 */
295 	cg = (icg + 2) % fs->e2fs_ncg;
296 	for (i = 2; i < fs->e2fs_ncg; i++) {
297 		result = (*allocator)(ip, cg, 0, size);
298 		if (result)
299 			return (result);
300 		cg++;
301 		if (cg == fs->e2fs_ncg)
302 			cg = 0;
303 	}
304 	return (0);
305 }
306 
307 /*
308  * Determine whether a block can be allocated.
309  *
310  * Check to see if a block of the appropriate size is available,
311  * and if it is, allocate it.
312  */
313 
314 static int32_t
315 ext2fs_alloccg(struct inode *ip, int cg, int32_t bpref, int size)
316 {
317 	struct m_ext2fs *fs;
318 	char *bbp;
319 	struct buf *bp;
320 	int error, bno, start, end, loc;
321 
322 	fs = ip->i_e2fs;
323 	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
324 		return (0);
325 	error = bread(ip->i_devvp, fsbtodb(fs,
326 		fs->e2fs_gd[cg].ext2bgd_b_bitmap),
327 		(int)fs->e2fs_bsize, NOCRED, &bp);
328 	if (error || fs->e2fs_gd[cg].ext2bgd_nbfree == 0) {
329 		brelse(bp);
330 		return (0);
331 	}
332 	bbp = (char *)bp->b_data;
333 
334 	if (dtog(fs, bpref) != cg)
335 		bpref = 0;
336 	if (bpref != 0) {
337 		bpref = dtogd(fs, bpref);
338 		/*
339 		 * if the requested block is available, use it
340 		 */
341 		if (isclr(bbp, bpref)) {
342 			bno = bpref;
343 			goto gotit;
344 		}
345 	}
346 	/*
347 	 * no blocks in the requested cylinder, so take next
348 	 * available one in this cylinder group.
349 	 * first try to get 8 contigous blocks, then fall back to a single
350 	 * block.
351 	 */
352 	if (bpref)
353 		start = dtogd(fs, bpref) / NBBY;
354 	else
355 		start = 0;
356 	end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
357 	for (loc = start; loc < end; loc++) {
358 		if (bbp[loc] == 0) {
359 			bno = loc * NBBY;
360 			goto gotit;
361 		}
362 	}
363 	for (loc = 0; loc < start; loc++) {
364 		if (bbp[loc] == 0) {
365 			bno = loc * NBBY;
366 			goto gotit;
367 		}
368 	}
369 
370 	bno = ext2fs_mapsearch(fs, bbp, bpref);
371 	if (bno < 0)
372 		return (0);
373 gotit:
374 #ifdef DIAGNOSTIC
375 	if (isset(bbp, (long)bno)) {
376 		printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
377 			cg, bno, fs->e2fs_fsmnt);
378 		panic("ext2fs_alloccg: dup alloc");
379 	}
380 #endif
381 	setbit(bbp, (long)bno);
382 	fs->e2fs.e2fs_fbcount--;
383 	fs->e2fs_gd[cg].ext2bgd_nbfree--;
384 	fs->e2fs_fmod = 1;
385 	bdwrite(bp);
386 	return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno);
387 }
388 
389 /*
390  * Determine whether an inode can be allocated.
391  *
392  * Check to see if an inode is available, and if it is,
393  * allocate it using the following policy:
394  *   1) allocate the requested inode.
395  *   2) allocate the next available inode after the requested
396  *	  inode in the specified cylinder group.
397  */
398 static int32_t
399 ext2fs_nodealloccg(struct inode *ip, int cg, int32_t ipref, int mode)
400 {
401 	struct m_ext2fs *fs;
402 	char *ibp;
403 	struct buf *bp;
404 	int error, start, len, loc, map, i;
405 
406 	ipref--; /* to avoid a lot of (ipref -1) */
407 	fs = ip->i_e2fs;
408 	if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
409 		return (0);
410 	error = bread(ip->i_devvp, fsbtodb(fs,
411 		fs->e2fs_gd[cg].ext2bgd_i_bitmap),
412 		(int)fs->e2fs_bsize, NOCRED, &bp);
413 	if (error) {
414 		brelse(bp);
415 		return (0);
416 	}
417 	ibp = (char *)bp->b_data;
418 	if (ipref) {
419 		ipref %= fs->e2fs.e2fs_ipg;
420 		if (isclr(ibp, ipref))
421 			goto gotit;
422 	}
423 	start = ipref / NBBY;
424 	len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
425 	loc = skpc(0xff, len, &ibp[start]);
426 	if (loc == 0) {
427 		len = start + 1;
428 		start = 0;
429 		loc = skpc(0xff, len, &ibp[0]);
430 		if (loc == 0) {
431 			printf("cg = %d, ipref = %d, fs = %s\n",
432 				cg, ipref, fs->e2fs_fsmnt);
433 			panic("ext2fs_nodealloccg: map corrupted");
434 			/* NOTREACHED */
435 		}
436 	}
437 	i = start + len - loc;
438 	map = ibp[i];
439 	ipref = i * NBBY;
440 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
441 		if ((map & i) == 0) {
442 			goto gotit;
443 		}
444 	}
445 	printf("fs = %s\n", fs->e2fs_fsmnt);
446 	panic("ext2fs_nodealloccg: block not in map");
447 	/* NOTREACHED */
448 gotit:
449 	setbit(ibp, ipref);
450 	fs->e2fs.e2fs_ficount--;
451 	fs->e2fs_gd[cg].ext2bgd_nifree--;
452 	fs->e2fs_fmod = 1;
453 	if ((mode & IFMT) == IFDIR) {
454 		fs->e2fs_gd[cg].ext2bgd_ndirs++;
455 	}
456 	bdwrite(bp);
457 	return (cg * fs->e2fs.e2fs_ipg + ipref +1);
458 }
459 
460 /*
461  * Free a block.
462  *
463  * The specified block is placed back in the
464  * free map.
465  */
466 void
467 ext2fs_blkfree(struct inode *ip, int32_t bno)
468 {
469 	struct m_ext2fs *fs;
470 	char *bbp;
471 	struct buf *bp;
472 	int error, cg;
473 
474 	fs = ip->i_e2fs;
475 	cg = dtog(fs, bno);
476 	if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
477 		printf("bad block %d, ino %d\n", bno, ip->i_number);
478 		ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block");
479 		return;
480 	}
481 	error = bread(ip->i_devvp,
482 		fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
483 		(int)fs->e2fs_bsize, NOCRED, &bp);
484 	if (error) {
485 		brelse(bp);
486 		return;
487 	}
488 	bbp = (char *)bp->b_data;
489 	bno = dtogd(fs, bno);
490 	if (isclr(bbp, bno)) {
491 		printf("dev = 0x%x, block = %d, fs = %s\n",
492 			ip->i_dev, bno, fs->e2fs_fsmnt);
493 		panic("blkfree: freeing free block");
494 	}
495 	clrbit(bbp, bno);
496 	fs->e2fs.e2fs_fbcount++;
497 	fs->e2fs_gd[cg].ext2bgd_nbfree++;
498 
499 	fs->e2fs_fmod = 1;
500 	bdwrite(bp);
501 }
502 
503 /*
504  * Free an inode.
505  *
506  * The specified inode is placed back in the free map.
507  */
508 int
509 ext2fs_inode_free(struct inode *pip, ino_t ino, mode_t mode)
510 {
511 	struct m_ext2fs *fs;
512 	char *ibp;
513 	struct buf *bp;
514 	int error, cg;
515 
516 	fs = pip->i_e2fs;
517 	if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
518 		panic("ifree: range: dev = 0x%x, ino = %d, fs = %s",
519 			pip->i_dev, ino, fs->e2fs_fsmnt);
520 	cg = ino_to_cg(fs, ino);
521 	error = bread(pip->i_devvp,
522 	        fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
523 		(int)fs->e2fs_bsize, NOCRED, &bp);
524 	if (error) {
525 		brelse(bp);
526 		return (0);
527 	}
528 	ibp = (char *)bp->b_data;
529 	ino = (ino - 1) % fs->e2fs.e2fs_ipg;
530 	if (isclr(ibp, ino)) {
531 		printf("dev = 0x%x, ino = %d, fs = %s\n",
532 			pip->i_dev, ino, fs->e2fs_fsmnt);
533 		if (fs->e2fs_ronly == 0)
534 			panic("ifree: freeing free inode");
535 	}
536 	clrbit(ibp, ino);
537 	fs->e2fs.e2fs_ficount++;
538 	fs->e2fs_gd[cg].ext2bgd_nifree++;
539 	if ((mode & IFMT) == IFDIR) {
540 		fs->e2fs_gd[cg].ext2bgd_ndirs--;
541 	}
542 	fs->e2fs_fmod = 1;
543 	bdwrite(bp);
544 	return (0);
545 }
546 
547 /*
548  * Find a block in the specified cylinder group.
549  *
550  * It is a panic if a request is made to find a block if none are
551  * available.
552  */
553 
554 static int32_t
555 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, int32_t bpref)
556 {
557 	int32_t bno;
558 	int start, len, loc, i, map;
559 
560 	/*
561 	 * find the fragment by searching through the free block
562 	 * map for an appropriate bit pattern
563 	 */
564 	if (bpref)
565 		start = dtogd(fs, bpref) / NBBY;
566 	else
567 		start = 0;
568 	len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
569 	loc = skpc(0xff, len, &bbp[start]);
570 	if (loc == 0) {
571 		len = start + 1;
572 		start = 0;
573 		loc = skpc(0xff, len, &bbp[start]);
574 		if (loc == 0) {
575 			printf("start = %d, len = %d, fs = %s\n",
576 				start, len, fs->e2fs_fsmnt);
577 			panic("ext2fs_alloccg: map corrupted");
578 			/* NOTREACHED */
579 		}
580 	}
581 	i = start + len - loc;
582 	map = bbp[i];
583 	bno = i * NBBY;
584 	for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
585 		if ((map & i) == 0)
586 			return (bno);
587 	}
588 	printf("fs = %s\n", fs->e2fs_fsmnt);
589 	panic("ext2fs_mapsearch: block not in map");
590 	/* NOTREACHED */
591 }
592 
593 /*
594  * Fserr prints the name of a file system with an error diagnostic.
595  *
596  * The form of the error message is:
597  *	fs: error message
598  */
599 static void
600 ext2fs_fserr(struct m_ext2fs *fs, uid_t uid, char *cp)
601 {
602 
603 	log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
604 }
605