xref: /csrg-svn/sys/ufs/ffs/ffs_alloc.c (revision 39516)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)ffs_alloc.c	7.12 (Berkeley) 11/12/89
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "buf.h"
24 #include "user.h"
25 #include "vnode.h"
26 #include "kernel.h"
27 #include "syslog.h"
28 #include "cmap.h"
29 #include "../ufs/quota.h"
30 #include "../ufs/inode.h"
31 #include "../ufs/fs.h"
32 
33 extern u_long		hashalloc();
34 extern ino_t		ialloccg();
35 extern daddr_t		alloccg();
36 extern daddr_t		alloccgblk();
37 extern daddr_t		fragextend();
38 extern daddr_t		blkpref();
39 extern daddr_t		mapsearch();
40 extern int		inside[], around[];
41 extern unsigned char	*fragtbl[];
42 
43 /*
44  * Allocate a block in the file system.
45  *
46  * The size of the requested block is given, which must be some
47  * multiple of fs_fsize and <= fs_bsize.
48  * A preference may be optionally specified. If a preference is given
49  * the following hierarchy is used to allocate a block:
50  *   1) allocate the requested block.
51  *   2) allocate a rotationally optimal block in the same cylinder.
52  *   3) allocate a block in the same cylinder group.
53  *   4) quadradically rehash into other cylinder groups, until an
54  *      available block is located.
55  * If no block preference is given the following heirarchy is used
56  * to allocate a block:
57  *   1) allocate a block in the cylinder group that contains the
58  *      inode for the file.
59  *   2) quadradically rehash into other cylinder groups, until an
60  *      available block is located.
61  */
62 alloc(ip, bpref, size, bpp, flags)
63 	register struct inode *ip;
64 	daddr_t bpref;
65 	int size;
66 	struct buf **bpp;
67 	int flags;
68 {
69 	daddr_t bno;
70 	register struct fs *fs;
71 	register struct buf *bp;
72 	int cg, error;
73 
74 	*bpp = 0;
75 	fs = ip->i_fs;
76 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
77 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
78 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
79 		panic("alloc: bad size");
80 	}
81 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
82 		goto nospace;
83 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
84 		goto nospace;
85 #ifdef QUOTA
86 	if (error = chkdq(ip, (long)btodb(size), 0))
87 		return (error);
88 #endif
89 	if (bpref >= fs->fs_size)
90 		bpref = 0;
91 	if (bpref == 0)
92 		cg = itog(fs, ip->i_number);
93 	else
94 		cg = dtog(fs, bpref);
95 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
96 		(u_long (*)())alloccg);
97 	if (bno <= 0)
98 		goto nospace;
99 	ip->i_blocks += btodb(size);
100 	ip->i_flag |= IUPD|ICHG;
101 	bp = getblk(ip->i_devvp, fsbtodb(fs, bno), size);
102 	if (flags & B_CLRBUF)
103 		clrbuf(bp);
104 	*bpp = bp;
105 	return (0);
106 nospace:
107 	fserr(fs, "file system full");
108 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
109 	return (ENOSPC);
110 }
111 
112 /*
113  * Reallocate a fragment to a bigger size
114  *
115  * The number and size of the old block is given, and a preference
116  * and new size is also specified. The allocator attempts to extend
117  * the original block. Failing that, the regular block allocator is
118  * invoked to get an appropriate block.
119  */
120 realloccg(ip, bprev, bpref, osize, nsize, bpp)
121 	register struct inode *ip;
122 	daddr_t bprev, bpref;
123 	int osize, nsize;
124 	struct buf **bpp;
125 {
126 	register struct fs *fs;
127 	struct buf *bp, *obp;
128 	int cg, request;
129 	daddr_t bno, bn;
130 	int i, error, count;
131 
132 	*bpp = 0;
133 	fs = ip->i_fs;
134 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
135 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
136 		printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
137 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
138 		panic("realloccg: bad size");
139 	}
140 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
141 		goto nospace;
142 	if (bprev == 0) {
143 		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
144 		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
145 		panic("realloccg: bad bprev");
146 	}
147 #ifdef QUOTA
148 	if (error = chkdq(ip, (long)btodb(nsize - osize), 0))
149 		return (error);
150 #endif
151 	cg = dtog(fs, bprev);
152 	bno = fragextend(ip, cg, (long)bprev, osize, nsize);
153 	if (bno != 0) {
154 		do {
155 			error = bread(ip->i_devvp, fsbtodb(fs, bno),
156 				osize, NOCRED, &bp);
157 			if (error) {
158 				brelse(bp);
159 				return (error);
160 			}
161 		} while (brealloc(bp, nsize) == 0);
162 		bp->b_flags |= B_DONE;
163 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
164 		ip->i_blocks += btodb(nsize - osize);
165 		ip->i_flag |= IUPD|ICHG;
166 		*bpp = bp;
167 		return (0);
168 	}
169 	if (bpref >= fs->fs_size)
170 		bpref = 0;
171 	switch ((int)fs->fs_optim) {
172 	case FS_OPTSPACE:
173 		/*
174 		 * Allocate an exact sized fragment. Although this makes
175 		 * best use of space, we will waste time relocating it if
176 		 * the file continues to grow. If the fragmentation is
177 		 * less than half of the minimum free reserve, we choose
178 		 * to begin optimizing for time.
179 		 */
180 		request = nsize;
181 		if (fs->fs_minfree < 5 ||
182 		    fs->fs_cstotal.cs_nffree >
183 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
184 			break;
185 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
186 			fs->fs_fsmnt);
187 		fs->fs_optim = FS_OPTTIME;
188 		break;
189 	case FS_OPTTIME:
190 		/*
191 		 * At this point we have discovered a file that is trying
192 		 * to grow a small fragment to a larger fragment. To save
193 		 * time, we allocate a full sized block, then free the
194 		 * unused portion. If the file continues to grow, the
195 		 * `fragextend' call above will be able to grow it in place
196 		 * without further copying. If aberrant programs cause
197 		 * disk fragmentation to grow within 2% of the free reserve,
198 		 * we choose to begin optimizing for space.
199 		 */
200 		request = fs->fs_bsize;
201 		if (fs->fs_cstotal.cs_nffree <
202 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
203 			break;
204 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
205 			fs->fs_fsmnt);
206 		fs->fs_optim = FS_OPTSPACE;
207 		break;
208 	default:
209 		printf("dev = 0x%x, optim = %d, fs = %s\n",
210 		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
211 		panic("realloccg: bad optim");
212 		/* NOTREACHED */
213 	}
214 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
215 		(u_long (*)())alloccg);
216 	if (bno > 0) {
217 		error = bread(ip->i_devvp, fsbtodb(fs, bprev),
218 			osize, NOCRED, &obp);
219 		if (error) {
220 			brelse(obp);
221 			return (error);
222 		}
223 		bn = fsbtodb(fs, bno);
224 		bp = getblk(ip->i_devvp, bn, nsize);
225 		bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize);
226 		count = howmany(osize, CLBYTES);
227 		for (i = 0; i < count; i++)
228 			munhash(ip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
229 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
230 		if (obp->b_flags & B_DELWRI) {
231 			obp->b_flags &= ~B_DELWRI;
232 			u.u_ru.ru_oublock--;		/* delete charge */
233 		}
234 		brelse(obp);
235 		blkfree(ip, bprev, (off_t)osize);
236 		if (nsize < request)
237 			blkfree(ip, bno + numfrags(fs, nsize),
238 				(off_t)(request - nsize));
239 		ip->i_blocks += btodb(nsize - osize);
240 		ip->i_flag |= IUPD|ICHG;
241 		*bpp = bp;
242 		return (0);
243 	}
244 nospace:
245 	/*
246 	 * no space available
247 	 */
248 	fserr(fs, "file system full");
249 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
250 	return (ENOSPC);
251 }
252 
253 /*
254  * Allocate an inode in the file system.
255  *
256  * A preference may be optionally specified. If a preference is given
257  * the following hierarchy is used to allocate an inode:
258  *   1) allocate the requested inode.
259  *   2) allocate an inode in the same cylinder group.
260  *   3) quadradically rehash into other cylinder groups, until an
261  *      available inode is located.
262  * If no inode preference is given the following heirarchy is used
263  * to allocate an inode:
264  *   1) allocate an inode in cylinder group 0.
265  *   2) quadradically rehash into other cylinder groups, until an
266  *      available inode is located.
267  */
268 ialloc(pip, ipref, mode, ipp)
269 	register struct inode *pip;
270 	ino_t ipref;
271 	int mode;
272 	struct inode **ipp;
273 {
274 	ino_t ino;
275 	register struct fs *fs;
276 	register struct inode *ip;
277 	int cg, error;
278 
279 	*ipp = 0;
280 	fs = pip->i_fs;
281 	if (fs->fs_cstotal.cs_nifree == 0)
282 		goto noinodes;
283 #ifdef QUOTA
284 	if (error = chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0))
285 		return (error);
286 #endif
287 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
288 		ipref = 0;
289 	cg = itog(fs, ipref);
290 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
291 	if (ino == 0)
292 		goto noinodes;
293 	error = iget(pip, ino, ipp);
294 	if (error) {
295 		ifree(pip, ino, 0);
296 		return (error);
297 	}
298 	ip = *ipp;
299 	if (ip->i_mode) {
300 		printf("mode = 0%o, inum = %d, fs = %s\n",
301 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
302 		panic("ialloc: dup alloc");
303 	}
304 	if (ip->i_blocks) {				/* XXX */
305 		printf("free inode %s/%d had %d blocks\n",
306 		    fs->fs_fsmnt, ino, ip->i_blocks);
307 		ip->i_blocks = 0;
308 	}
309 	ip->i_flags = 0;
310 	/*
311 	 * Set up a new generation number for this inode.
312 	 */
313 	if (++nextgennumber < (u_long)time.tv_sec)
314 		nextgennumber = time.tv_sec;
315 	ip->i_gen = nextgennumber;
316 	return (0);
317 noinodes:
318 	fserr(fs, "out of inodes");
319 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
320 	return (ENOSPC);
321 }
322 
323 /*
324  * Find a cylinder to place a directory.
325  *
326  * The policy implemented by this algorithm is to select from
327  * among those cylinder groups with above the average number of
328  * free inodes, the one with the smallest number of directories.
329  */
330 ino_t
331 dirpref(fs)
332 	register struct fs *fs;
333 {
334 	int cg, minndir, mincg, avgifree;
335 
336 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
337 	minndir = fs->fs_ipg;
338 	mincg = 0;
339 	for (cg = 0; cg < fs->fs_ncg; cg++)
340 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
341 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
342 			mincg = cg;
343 			minndir = fs->fs_cs(fs, cg).cs_ndir;
344 		}
345 	return ((ino_t)(fs->fs_ipg * mincg));
346 }
347 
348 /*
349  * Select the desired position for the next block in a file.  The file is
350  * logically divided into sections. The first section is composed of the
351  * direct blocks. Each additional section contains fs_maxbpg blocks.
352  *
353  * If no blocks have been allocated in the first section, the policy is to
354  * request a block in the same cylinder group as the inode that describes
355  * the file. If no blocks have been allocated in any other section, the
356  * policy is to place the section in a cylinder group with a greater than
357  * average number of free blocks.  An appropriate cylinder group is found
358  * by using a rotor that sweeps the cylinder groups. When a new group of
359  * blocks is needed, the sweep begins in the cylinder group following the
360  * cylinder group from which the previous allocation was made. The sweep
361  * continues until a cylinder group with greater than the average number
362  * of free blocks is found. If the allocation is for the first block in an
363  * indirect block, the information on the previous allocation is unavailable;
364  * here a best guess is made based upon the logical block number being
365  * allocated.
366  *
367  * If a section is already partially allocated, the policy is to
368  * contiguously allocate fs_maxcontig blocks.  The end of one of these
369  * contiguous blocks and the beginning of the next is physically separated
370  * so that the disk head will be in transit between them for at least
371  * fs_rotdelay milliseconds.  This is to allow time for the processor to
372  * schedule another I/O transfer.
373  */
374 daddr_t
375 blkpref(ip, lbn, indx, bap)
376 	struct inode *ip;
377 	daddr_t lbn;
378 	int indx;
379 	daddr_t *bap;
380 {
381 	register struct fs *fs;
382 	register int cg;
383 	int avgbfree, startcg;
384 	daddr_t nextblk;
385 
386 	fs = ip->i_fs;
387 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
388 		if (lbn < NDADDR) {
389 			cg = itog(fs, ip->i_number);
390 			return (fs->fs_fpg * cg + fs->fs_frag);
391 		}
392 		/*
393 		 * Find a cylinder with greater than average number of
394 		 * unused data blocks.
395 		 */
396 		if (indx == 0 || bap[indx - 1] == 0)
397 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
398 		else
399 			startcg = dtog(fs, bap[indx - 1]) + 1;
400 		startcg %= fs->fs_ncg;
401 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
402 		for (cg = startcg; cg < fs->fs_ncg; cg++)
403 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
404 				fs->fs_cgrotor = cg;
405 				return (fs->fs_fpg * cg + fs->fs_frag);
406 			}
407 		for (cg = 0; cg <= startcg; cg++)
408 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
409 				fs->fs_cgrotor = cg;
410 				return (fs->fs_fpg * cg + fs->fs_frag);
411 			}
412 		return (NULL);
413 	}
414 	/*
415 	 * One or more previous blocks have been laid out. If less
416 	 * than fs_maxcontig previous blocks are contiguous, the
417 	 * next block is requested contiguously, otherwise it is
418 	 * requested rotationally delayed by fs_rotdelay milliseconds.
419 	 */
420 	nextblk = bap[indx - 1] + fs->fs_frag;
421 	if (indx > fs->fs_maxcontig &&
422 	    bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
423 	    != nextblk)
424 		return (nextblk);
425 	if (fs->fs_rotdelay != 0)
426 		/*
427 		 * Here we convert ms of delay to frags as:
428 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
429 		 *	((sect/frag) * (ms/sec))
430 		 * then round up to the next block.
431 		 */
432 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
433 		    (NSPF(fs) * 1000), fs->fs_frag);
434 	return (nextblk);
435 }
436 
437 /*
438  * Implement the cylinder overflow algorithm.
439  *
440  * The policy implemented by this algorithm is:
441  *   1) allocate the block in its requested cylinder group.
442  *   2) quadradically rehash on the cylinder group number.
443  *   3) brute force search for a free block.
444  */
445 /*VARARGS5*/
446 u_long
447 hashalloc(ip, cg, pref, size, allocator)
448 	struct inode *ip;
449 	int cg;
450 	long pref;
451 	int size;	/* size for data blocks, mode for inodes */
452 	u_long (*allocator)();
453 {
454 	register struct fs *fs;
455 	long result;
456 	int i, icg = cg;
457 
458 	fs = ip->i_fs;
459 	/*
460 	 * 1: preferred cylinder group
461 	 */
462 	result = (*allocator)(ip, cg, pref, size);
463 	if (result)
464 		return (result);
465 	/*
466 	 * 2: quadratic rehash
467 	 */
468 	for (i = 1; i < fs->fs_ncg; i *= 2) {
469 		cg += i;
470 		if (cg >= fs->fs_ncg)
471 			cg -= fs->fs_ncg;
472 		result = (*allocator)(ip, cg, 0, size);
473 		if (result)
474 			return (result);
475 	}
476 	/*
477 	 * 3: brute force search
478 	 * Note that we start at i == 2, since 0 was checked initially,
479 	 * and 1 is always checked in the quadratic rehash.
480 	 */
481 	cg = (icg + 2) % fs->fs_ncg;
482 	for (i = 2; i < fs->fs_ncg; i++) {
483 		result = (*allocator)(ip, cg, 0, size);
484 		if (result)
485 			return (result);
486 		cg++;
487 		if (cg == fs->fs_ncg)
488 			cg = 0;
489 	}
490 	return (NULL);
491 }
492 
493 /*
494  * Determine whether a fragment can be extended.
495  *
496  * Check to see if the necessary fragments are available, and
497  * if they are, allocate them.
498  */
499 daddr_t
500 fragextend(ip, cg, bprev, osize, nsize)
501 	struct inode *ip;
502 	int cg;
503 	long bprev;
504 	int osize, nsize;
505 {
506 	register struct fs *fs;
507 	register struct cg *cgp;
508 	struct buf *bp;
509 	long bno;
510 	int frags, bbase;
511 	int i, error;
512 
513 	fs = ip->i_fs;
514 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
515 		return (NULL);
516 	frags = numfrags(fs, nsize);
517 	bbase = fragnum(fs, bprev);
518 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
519 		/* cannot extend across a block boundary */
520 		return (NULL);
521 	}
522 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
523 		(int)fs->fs_cgsize, NOCRED, &bp);
524 	if (error) {
525 		brelse(bp);
526 		return (NULL);
527 	}
528 	cgp = bp->b_un.b_cg;
529 	if (!cg_chkmagic(cgp)) {
530 		brelse(bp);
531 		return (NULL);
532 	}
533 	cgp->cg_time = time.tv_sec;
534 	bno = dtogd(fs, bprev);
535 	for (i = numfrags(fs, osize); i < frags; i++)
536 		if (isclr(cg_blksfree(cgp), bno + i)) {
537 			brelse(bp);
538 			return (NULL);
539 		}
540 	/*
541 	 * the current fragment can be extended
542 	 * deduct the count on fragment being extended into
543 	 * increase the count on the remaining fragment (if any)
544 	 * allocate the extended piece
545 	 */
546 	for (i = frags; i < fs->fs_frag - bbase; i++)
547 		if (isclr(cg_blksfree(cgp), bno + i))
548 			break;
549 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
550 	if (i != frags)
551 		cgp->cg_frsum[i - frags]++;
552 	for (i = numfrags(fs, osize); i < frags; i++) {
553 		clrbit(cg_blksfree(cgp), bno + i);
554 		cgp->cg_cs.cs_nffree--;
555 		fs->fs_cstotal.cs_nffree--;
556 		fs->fs_cs(fs, cg).cs_nffree--;
557 	}
558 	fs->fs_fmod++;
559 	bdwrite(bp);
560 	return (bprev);
561 }
562 
563 /*
564  * Determine whether a block can be allocated.
565  *
566  * Check to see if a block of the apprpriate size is available,
567  * and if it is, allocate it.
568  */
569 daddr_t
570 alloccg(ip, cg, bpref, size)
571 	struct inode *ip;
572 	int cg;
573 	daddr_t bpref;
574 	int size;
575 {
576 	register struct fs *fs;
577 	register struct cg *cgp;
578 	struct buf *bp;
579 	register int i;
580 	int error, bno, frags, allocsiz;
581 
582 	fs = ip->i_fs;
583 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
584 		return (NULL);
585 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
586 		(int)fs->fs_cgsize, NOCRED, &bp);
587 	if (error) {
588 		brelse(bp);
589 		return (NULL);
590 	}
591 	cgp = bp->b_un.b_cg;
592 	if (!cg_chkmagic(cgp) ||
593 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
594 		brelse(bp);
595 		return (NULL);
596 	}
597 	cgp->cg_time = time.tv_sec;
598 	if (size == fs->fs_bsize) {
599 		bno = alloccgblk(fs, cgp, bpref);
600 		bdwrite(bp);
601 		return (bno);
602 	}
603 	/*
604 	 * check to see if any fragments are already available
605 	 * allocsiz is the size which will be allocated, hacking
606 	 * it down to a smaller size if necessary
607 	 */
608 	frags = numfrags(fs, size);
609 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
610 		if (cgp->cg_frsum[allocsiz] != 0)
611 			break;
612 	if (allocsiz == fs->fs_frag) {
613 		/*
614 		 * no fragments were available, so a block will be
615 		 * allocated, and hacked up
616 		 */
617 		if (cgp->cg_cs.cs_nbfree == 0) {
618 			brelse(bp);
619 			return (NULL);
620 		}
621 		bno = alloccgblk(fs, cgp, bpref);
622 		bpref = dtogd(fs, bno);
623 		for (i = frags; i < fs->fs_frag; i++)
624 			setbit(cg_blksfree(cgp), bpref + i);
625 		i = fs->fs_frag - frags;
626 		cgp->cg_cs.cs_nffree += i;
627 		fs->fs_cstotal.cs_nffree += i;
628 		fs->fs_cs(fs, cg).cs_nffree += i;
629 		fs->fs_fmod++;
630 		cgp->cg_frsum[i]++;
631 		bdwrite(bp);
632 		return (bno);
633 	}
634 	bno = mapsearch(fs, cgp, bpref, allocsiz);
635 	if (bno < 0) {
636 		brelse(bp);
637 		return (NULL);
638 	}
639 	for (i = 0; i < frags; i++)
640 		clrbit(cg_blksfree(cgp), bno + i);
641 	cgp->cg_cs.cs_nffree -= frags;
642 	fs->fs_cstotal.cs_nffree -= frags;
643 	fs->fs_cs(fs, cg).cs_nffree -= frags;
644 	fs->fs_fmod++;
645 	cgp->cg_frsum[allocsiz]--;
646 	if (frags != allocsiz)
647 		cgp->cg_frsum[allocsiz - frags]++;
648 	bdwrite(bp);
649 	return (cg * fs->fs_fpg + bno);
650 }
651 
652 /*
653  * Allocate a block in a cylinder group.
654  *
655  * This algorithm implements the following policy:
656  *   1) allocate the requested block.
657  *   2) allocate a rotationally optimal block in the same cylinder.
658  *   3) allocate the next available block on the block rotor for the
659  *      specified cylinder group.
660  * Note that this routine only allocates fs_bsize blocks; these
661  * blocks may be fragmented by the routine that allocates them.
662  */
663 daddr_t
664 alloccgblk(fs, cgp, bpref)
665 	register struct fs *fs;
666 	register struct cg *cgp;
667 	daddr_t bpref;
668 {
669 	daddr_t bno;
670 	int cylno, pos, delta;
671 	short *cylbp;
672 	register int i;
673 
674 	if (bpref == 0) {
675 		bpref = cgp->cg_rotor;
676 		goto norot;
677 	}
678 	bpref = blknum(fs, bpref);
679 	bpref = dtogd(fs, bpref);
680 	/*
681 	 * if the requested block is available, use it
682 	 */
683 	if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
684 		bno = bpref;
685 		goto gotit;
686 	}
687 	/*
688 	 * check for a block available on the same cylinder
689 	 */
690 	cylno = cbtocylno(fs, bpref);
691 	if (cg_blktot(cgp)[cylno] == 0)
692 		goto norot;
693 	if (fs->fs_cpc == 0) {
694 		/*
695 		 * block layout info is not available, so just have
696 		 * to take any block in this cylinder.
697 		 */
698 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
699 		goto norot;
700 	}
701 	/*
702 	 * check the summary information to see if a block is
703 	 * available in the requested cylinder starting at the
704 	 * requested rotational position and proceeding around.
705 	 */
706 	cylbp = cg_blks(fs, cgp, cylno);
707 	pos = cbtorpos(fs, bpref);
708 	for (i = pos; i < fs->fs_nrpos; i++)
709 		if (cylbp[i] > 0)
710 			break;
711 	if (i == fs->fs_nrpos)
712 		for (i = 0; i < pos; i++)
713 			if (cylbp[i] > 0)
714 				break;
715 	if (cylbp[i] > 0) {
716 		/*
717 		 * found a rotational position, now find the actual
718 		 * block. A panic if none is actually there.
719 		 */
720 		pos = cylno % fs->fs_cpc;
721 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
722 		if (fs_postbl(fs, pos)[i] == -1) {
723 			printf("pos = %d, i = %d, fs = %s\n",
724 			    pos, i, fs->fs_fsmnt);
725 			panic("alloccgblk: cyl groups corrupted");
726 		}
727 		for (i = fs_postbl(fs, pos)[i];; ) {
728 			if (isblock(fs, cg_blksfree(cgp), bno + i)) {
729 				bno = blkstofrags(fs, (bno + i));
730 				goto gotit;
731 			}
732 			delta = fs_rotbl(fs)[i];
733 			if (delta <= 0 ||
734 			    delta + i > fragstoblks(fs, fs->fs_fpg))
735 				break;
736 			i += delta;
737 		}
738 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
739 		panic("alloccgblk: can't find blk in cyl");
740 	}
741 norot:
742 	/*
743 	 * no blocks in the requested cylinder, so take next
744 	 * available one in this cylinder group.
745 	 */
746 	bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
747 	if (bno < 0)
748 		return (NULL);
749 	cgp->cg_rotor = bno;
750 gotit:
751 	clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
752 	cgp->cg_cs.cs_nbfree--;
753 	fs->fs_cstotal.cs_nbfree--;
754 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
755 	cylno = cbtocylno(fs, bno);
756 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
757 	cg_blktot(cgp)[cylno]--;
758 	fs->fs_fmod++;
759 	return (cgp->cg_cgx * fs->fs_fpg + bno);
760 }
761 
762 /*
763  * Determine whether an inode can be allocated.
764  *
765  * Check to see if an inode is available, and if it is,
766  * allocate it using the following policy:
767  *   1) allocate the requested inode.
768  *   2) allocate the next available inode after the requested
769  *      inode in the specified cylinder group.
770  */
771 ino_t
772 ialloccg(ip, cg, ipref, mode)
773 	struct inode *ip;
774 	int cg;
775 	daddr_t ipref;
776 	int mode;
777 {
778 	register struct fs *fs;
779 	register struct cg *cgp;
780 	struct buf *bp;
781 	int error, start, len, loc, map, i;
782 
783 	fs = ip->i_fs;
784 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
785 		return (NULL);
786 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
787 		(int)fs->fs_cgsize, NOCRED, &bp);
788 	if (error) {
789 		brelse(bp);
790 		return (NULL);
791 	}
792 	cgp = bp->b_un.b_cg;
793 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
794 		brelse(bp);
795 		return (NULL);
796 	}
797 	cgp->cg_time = time.tv_sec;
798 	if (ipref) {
799 		ipref %= fs->fs_ipg;
800 		if (isclr(cg_inosused(cgp), ipref))
801 			goto gotit;
802 	}
803 	start = cgp->cg_irotor / NBBY;
804 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
805 	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
806 	if (loc == 0) {
807 		len = start + 1;
808 		start = 0;
809 		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
810 		if (loc == 0) {
811 			printf("cg = %s, irotor = %d, fs = %s\n",
812 			    cg, cgp->cg_irotor, fs->fs_fsmnt);
813 			panic("ialloccg: map corrupted");
814 			/* NOTREACHED */
815 		}
816 	}
817 	i = start + len - loc;
818 	map = cg_inosused(cgp)[i];
819 	ipref = i * NBBY;
820 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
821 		if ((map & i) == 0) {
822 			cgp->cg_irotor = ipref;
823 			goto gotit;
824 		}
825 	}
826 	printf("fs = %s\n", fs->fs_fsmnt);
827 	panic("ialloccg: block not in map");
828 	/* NOTREACHED */
829 gotit:
830 	setbit(cg_inosused(cgp), ipref);
831 	cgp->cg_cs.cs_nifree--;
832 	fs->fs_cstotal.cs_nifree--;
833 	fs->fs_cs(fs, cg).cs_nifree--;
834 	fs->fs_fmod++;
835 	if ((mode & IFMT) == IFDIR) {
836 		cgp->cg_cs.cs_ndir++;
837 		fs->fs_cstotal.cs_ndir++;
838 		fs->fs_cs(fs, cg).cs_ndir++;
839 	}
840 	bdwrite(bp);
841 	return (cg * fs->fs_ipg + ipref);
842 }
843 
844 /*
845  * Free a block or fragment.
846  *
847  * The specified block or fragment is placed back in the
848  * free map. If a fragment is deallocated, a possible
849  * block reassembly is checked.
850  */
851 blkfree(ip, bno, size)
852 	register struct inode *ip;
853 	daddr_t bno;
854 	off_t size;
855 {
856 	register struct fs *fs;
857 	register struct cg *cgp;
858 	struct buf *bp;
859 	int error, cg, blk, frags, bbase;
860 	register int i;
861 
862 	fs = ip->i_fs;
863 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
864 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
865 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
866 		panic("blkfree: bad size");
867 	}
868 	cg = dtog(fs, bno);
869 	if (badblock(fs, bno)) {
870 		printf("bad block %d, ino %d\n", bno, ip->i_number);
871 		return;
872 	}
873 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
874 		(int)fs->fs_cgsize, NOCRED, &bp);
875 	if (error) {
876 		brelse(bp);
877 		return;
878 	}
879 	cgp = bp->b_un.b_cg;
880 	if (!cg_chkmagic(cgp)) {
881 		brelse(bp);
882 		return;
883 	}
884 	cgp->cg_time = time.tv_sec;
885 	bno = dtogd(fs, bno);
886 	if (size == fs->fs_bsize) {
887 		if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
888 			printf("dev = 0x%x, block = %d, fs = %s\n",
889 			    ip->i_dev, bno, fs->fs_fsmnt);
890 			panic("blkfree: freeing free block");
891 		}
892 		setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
893 		cgp->cg_cs.cs_nbfree++;
894 		fs->fs_cstotal.cs_nbfree++;
895 		fs->fs_cs(fs, cg).cs_nbfree++;
896 		i = cbtocylno(fs, bno);
897 		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
898 		cg_blktot(cgp)[i]++;
899 	} else {
900 		bbase = bno - fragnum(fs, bno);
901 		/*
902 		 * decrement the counts associated with the old frags
903 		 */
904 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
905 		fragacct(fs, blk, cgp->cg_frsum, -1);
906 		/*
907 		 * deallocate the fragment
908 		 */
909 		frags = numfrags(fs, size);
910 		for (i = 0; i < frags; i++) {
911 			if (isset(cg_blksfree(cgp), bno + i)) {
912 				printf("dev = 0x%x, block = %d, fs = %s\n",
913 				    ip->i_dev, bno + i, fs->fs_fsmnt);
914 				panic("blkfree: freeing free frag");
915 			}
916 			setbit(cg_blksfree(cgp), bno + i);
917 		}
918 		cgp->cg_cs.cs_nffree += i;
919 		fs->fs_cstotal.cs_nffree += i;
920 		fs->fs_cs(fs, cg).cs_nffree += i;
921 		/*
922 		 * add back in counts associated with the new frags
923 		 */
924 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
925 		fragacct(fs, blk, cgp->cg_frsum, 1);
926 		/*
927 		 * if a complete block has been reassembled, account for it
928 		 */
929 		if (isblock(fs, cg_blksfree(cgp),
930 		    (daddr_t)fragstoblks(fs, bbase))) {
931 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
932 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
933 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
934 			cgp->cg_cs.cs_nbfree++;
935 			fs->fs_cstotal.cs_nbfree++;
936 			fs->fs_cs(fs, cg).cs_nbfree++;
937 			i = cbtocylno(fs, bbase);
938 			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
939 			cg_blktot(cgp)[i]++;
940 		}
941 	}
942 	fs->fs_fmod++;
943 	bdwrite(bp);
944 }
945 
946 /*
947  * Free an inode.
948  *
949  * The specified inode is placed back in the free map.
950  */
951 ifree(ip, ino, mode)
952 	struct inode *ip;
953 	ino_t ino;
954 	int mode;
955 {
956 	register struct fs *fs;
957 	register struct cg *cgp;
958 	struct buf *bp;
959 	int error, cg;
960 
961 	fs = ip->i_fs;
962 	if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
963 		printf("dev = 0x%x, ino = %d, fs = %s\n",
964 		    ip->i_dev, ino, fs->fs_fsmnt);
965 		panic("ifree: range");
966 	}
967 	cg = itog(fs, ino);
968 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
969 		(int)fs->fs_cgsize, NOCRED, &bp);
970 	if (error) {
971 		brelse(bp);
972 		return;
973 	}
974 	cgp = bp->b_un.b_cg;
975 	if (!cg_chkmagic(cgp)) {
976 		brelse(bp);
977 		return;
978 	}
979 	cgp->cg_time = time.tv_sec;
980 	ino %= fs->fs_ipg;
981 	if (isclr(cg_inosused(cgp), ino)) {
982 		printf("dev = 0x%x, ino = %d, fs = %s\n",
983 		    ip->i_dev, ino, fs->fs_fsmnt);
984 		panic("ifree: freeing free inode");
985 	}
986 	clrbit(cg_inosused(cgp), ino);
987 	if (ino < cgp->cg_irotor)
988 		cgp->cg_irotor = ino;
989 	cgp->cg_cs.cs_nifree++;
990 	fs->fs_cstotal.cs_nifree++;
991 	fs->fs_cs(fs, cg).cs_nifree++;
992 	if ((mode & IFMT) == IFDIR) {
993 		cgp->cg_cs.cs_ndir--;
994 		fs->fs_cstotal.cs_ndir--;
995 		fs->fs_cs(fs, cg).cs_ndir--;
996 	}
997 	fs->fs_fmod++;
998 	bdwrite(bp);
999 }
1000 
1001 /*
1002  * Find a block of the specified size in the specified cylinder group.
1003  *
1004  * It is a panic if a request is made to find a block if none are
1005  * available.
1006  */
1007 daddr_t
1008 mapsearch(fs, cgp, bpref, allocsiz)
1009 	register struct fs *fs;
1010 	register struct cg *cgp;
1011 	daddr_t bpref;
1012 	int allocsiz;
1013 {
1014 	daddr_t bno;
1015 	int start, len, loc, i;
1016 	int blk, field, subfield, pos;
1017 
1018 	/*
1019 	 * find the fragment by searching through the free block
1020 	 * map for an appropriate bit pattern
1021 	 */
1022 	if (bpref)
1023 		start = dtogd(fs, bpref) / NBBY;
1024 	else
1025 		start = cgp->cg_frotor / NBBY;
1026 	len = howmany(fs->fs_fpg, NBBY) - start;
1027 	loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start],
1028 		(u_char *)fragtbl[fs->fs_frag],
1029 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1030 	if (loc == 0) {
1031 		len = start + 1;
1032 		start = 0;
1033 		loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0],
1034 			(u_char *)fragtbl[fs->fs_frag],
1035 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1036 		if (loc == 0) {
1037 			printf("start = %d, len = %d, fs = %s\n",
1038 			    start, len, fs->fs_fsmnt);
1039 			panic("alloccg: map corrupted");
1040 			/* NOTREACHED */
1041 		}
1042 	}
1043 	bno = (start + len - loc) * NBBY;
1044 	cgp->cg_frotor = bno;
1045 	/*
1046 	 * found the byte in the map
1047 	 * sift through the bits to find the selected frag
1048 	 */
1049 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1050 		blk = blkmap(fs, cg_blksfree(cgp), bno);
1051 		blk <<= 1;
1052 		field = around[allocsiz];
1053 		subfield = inside[allocsiz];
1054 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1055 			if ((blk & field) == subfield)
1056 				return (bno + pos);
1057 			field <<= 1;
1058 			subfield <<= 1;
1059 		}
1060 	}
1061 	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1062 	panic("alloccg: block not in map");
1063 	return (-1);
1064 }
1065 
1066 /*
1067  * Fserr prints the name of a file system with an error diagnostic.
1068  *
1069  * The form of the error message is:
1070  *	fs: error message
1071  */
1072 fserr(fs, cp)
1073 	struct fs *fs;
1074 	char *cp;
1075 {
1076 
1077 	log(LOG_ERR, "%s: %s\n", fs->fs_fsmnt, cp);
1078 }
1079