xref: /csrg-svn/sys/ufs/lfs/lfs_alloc.c (revision 41309)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)lfs_alloc.c	7.17 (Berkeley) 05/02/90
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "buf.h"
24 #include "user.h"
25 #include "vnode.h"
26 #include "kernel.h"
27 #include "syslog.h"
28 #include "cmap.h"
29 #include "../ufs/quota.h"
30 #include "../ufs/inode.h"
31 #include "../ufs/fs.h"
32 
33 extern u_long		hashalloc();
34 extern ino_t		ialloccg();
35 extern daddr_t		alloccg();
36 extern daddr_t		alloccgblk();
37 extern daddr_t		fragextend();
38 extern daddr_t		blkpref();
39 extern daddr_t		mapsearch();
40 extern int		inside[], around[];
41 extern unsigned char	*fragtbl[];
42 
43 /*
44  * Allocate a block in the file system.
45  *
46  * The size of the requested block is given, which must be some
47  * multiple of fs_fsize and <= fs_bsize.
48  * A preference may be optionally specified. If a preference is given
49  * the following hierarchy is used to allocate a block:
50  *   1) allocate the requested block.
51  *   2) allocate a rotationally optimal block in the same cylinder.
52  *   3) allocate a block in the same cylinder group.
53  *   4) quadradically rehash into other cylinder groups, until an
54  *      available block is located.
55  * If no block preference is given the following heirarchy is used
56  * to allocate a block:
57  *   1) allocate a block in the cylinder group that contains the
58  *      inode for the file.
59  *   2) quadradically rehash into other cylinder groups, until an
60  *      available block is located.
61  */
62 alloc(ip, lbn, bpref, size, bnp)
63 	register struct inode *ip;
64 	daddr_t lbn, bpref;
65 	int size;
66 	daddr_t *bnp;
67 {
68 	daddr_t bno;
69 	register struct fs *fs;
70 	register struct buf *bp;
71 	int cg, error;
72 	struct ucred *cred = u.u_cred;		/* XXX */
73 
74 	*bnp = 0;
75 	fs = ip->i_fs;
76 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
77 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
78 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
79 		panic("alloc: bad size");
80 	}
81 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
82 		goto nospace;
83 	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
84 		goto nospace;
85 #ifdef QUOTA
86 	if (error = chkdq(ip, (long)btodb(size), cred, 0))
87 		return (error);
88 #endif
89 	if (bpref >= fs->fs_size)
90 		bpref = 0;
91 	if (bpref == 0)
92 		cg = itog(fs, ip->i_number);
93 	else
94 		cg = dtog(fs, bpref);
95 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
96 		(u_long (*)())alloccg);
97 	if (bno > 0) {
98 		ip->i_blocks += btodb(size);
99 		ip->i_flag |= IUPD|ICHG;
100 		*bnp = bno;
101 		return (0);
102 	}
103 nospace:
104 	fserr(fs, "file system full");
105 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
106 	return (ENOSPC);
107 }
108 
109 /*
110  * Reallocate a fragment to a bigger size
111  *
112  * The number and size of the old block is given, and a preference
113  * and new size is also specified. The allocator attempts to extend
114  * the original block. Failing that, the regular block allocator is
115  * invoked to get an appropriate block.
116  */
117 realloccg(ip, lbprev, bpref, osize, nsize, bpp)
118 	register struct inode *ip;
119 	off_t lbprev;
120 	daddr_t bpref;
121 	int osize, nsize;
122 	struct buf **bpp;
123 {
124 	register struct fs *fs;
125 	struct buf *bp, *obp;
126 	int cg, request;
127 	daddr_t bprev, bno, bn;
128 	int i, error, count;
129 	struct ucred *cred = u.u_cred;		/* XXX */
130 
131 	*bpp = 0;
132 	fs = ip->i_fs;
133 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
134 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
135 		printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
136 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
137 		panic("realloccg: bad size");
138 	}
139 	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
140 		goto nospace;
141 	if ((bprev = ip->i_db[lbprev]) == 0) {
142 		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
143 		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
144 		panic("realloccg: bad bprev");
145 	}
146 #ifdef QUOTA
147 	if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0))
148 		return (error);
149 #endif
150 	/*
151 	 * Allocate the extra space in the buffer.
152 	 */
153 	if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
154 		brelse(bp);
155 		return (error);
156 	}
157 	brealloc(bp, nsize);
158 	bp->b_flags |= B_DONE;
159 	bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
160 	/*
161 	 * Check for extension in the existing location.
162 	 */
163 	cg = dtog(fs, bprev);
164 	if (bno = fragextend(ip, cg, (long)bprev, osize, nsize)) {
165 		if (bp->b_blkno != fsbtodb(fs, bno))
166 			panic("bad blockno");
167 		ip->i_blocks += btodb(nsize - osize);
168 		ip->i_flag |= IUPD|ICHG;
169 		*bpp = bp;
170 		return (0);
171 	}
172 	/*
173 	 * Allocate a new disk location.
174 	 */
175 	if (bpref >= fs->fs_size)
176 		bpref = 0;
177 	switch ((int)fs->fs_optim) {
178 	case FS_OPTSPACE:
179 		/*
180 		 * Allocate an exact sized fragment. Although this makes
181 		 * best use of space, we will waste time relocating it if
182 		 * the file continues to grow. If the fragmentation is
183 		 * less than half of the minimum free reserve, we choose
184 		 * to begin optimizing for time.
185 		 */
186 		request = nsize;
187 		if (fs->fs_minfree < 5 ||
188 		    fs->fs_cstotal.cs_nffree >
189 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
190 			break;
191 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
192 			fs->fs_fsmnt);
193 		fs->fs_optim = FS_OPTTIME;
194 		break;
195 	case FS_OPTTIME:
196 		/*
197 		 * At this point we have discovered a file that is trying
198 		 * to grow a small fragment to a larger fragment. To save
199 		 * time, we allocate a full sized block, then free the
200 		 * unused portion. If the file continues to grow, the
201 		 * `fragextend' call above will be able to grow it in place
202 		 * without further copying. If aberrant programs cause
203 		 * disk fragmentation to grow within 2% of the free reserve,
204 		 * we choose to begin optimizing for space.
205 		 */
206 		request = fs->fs_bsize;
207 		if (fs->fs_cstotal.cs_nffree <
208 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
209 			break;
210 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
211 			fs->fs_fsmnt);
212 		fs->fs_optim = FS_OPTSPACE;
213 		break;
214 	default:
215 		printf("dev = 0x%x, optim = %d, fs = %s\n",
216 		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
217 		panic("realloccg: bad optim");
218 		/* NOTREACHED */
219 	}
220 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
221 		(u_long (*)())alloccg);
222 	if (bno > 0) {
223 		bp->b_blkno = bn = fsbtodb(fs, bno);
224 		count = howmany(osize, CLBYTES);
225 		for (i = 0; i < count; i++)
226 			munhash(ip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
227 		blkfree(ip, bprev, (off_t)osize);
228 		if (nsize < request)
229 			blkfree(ip, bno + numfrags(fs, nsize),
230 				(off_t)(request - nsize));
231 		ip->i_blocks += btodb(nsize - osize);
232 		ip->i_flag |= IUPD|ICHG;
233 		*bpp = bp;
234 		return (0);
235 	}
236 	brelse(bp);
237 nospace:
238 	/*
239 	 * no space available
240 	 */
241 	fserr(fs, "file system full");
242 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
243 	return (ENOSPC);
244 }
245 
246 /*
247  * Allocate an inode in the file system.
248  *
249  * A preference may be optionally specified. If a preference is given
250  * the following hierarchy is used to allocate an inode:
251  *   1) allocate the requested inode.
252  *   2) allocate an inode in the same cylinder group.
253  *   3) quadradically rehash into other cylinder groups, until an
254  *      available inode is located.
255  * If no inode preference is given the following heirarchy is used
256  * to allocate an inode:
257  *   1) allocate an inode in cylinder group 0.
258  *   2) quadradically rehash into other cylinder groups, until an
259  *      available inode is located.
260  */
261 ialloc(pip, ipref, mode, cred, ipp)
262 	register struct inode *pip;
263 	ino_t ipref;
264 	int mode;
265 	struct ucred *cred;
266 	struct inode **ipp;
267 {
268 	ino_t ino;
269 	register struct fs *fs;
270 	register struct inode *ip;
271 	int cg, error;
272 
273 	*ipp = 0;
274 	fs = pip->i_fs;
275 	if (fs->fs_cstotal.cs_nifree == 0)
276 		goto noinodes;
277 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
278 		ipref = 0;
279 	cg = itog(fs, ipref);
280 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
281 	if (ino == 0)
282 		goto noinodes;
283 	error = iget(pip, ino, ipp);
284 	if (error) {
285 		ifree(pip, ino, mode);
286 		return (error);
287 	}
288 	ip = *ipp;
289 	if (ip->i_mode) {
290 		printf("mode = 0%o, inum = %d, fs = %s\n",
291 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
292 		panic("ialloc: dup alloc");
293 	}
294 	if (ip->i_blocks) {				/* XXX */
295 		printf("free inode %s/%d had %d blocks\n",
296 		    fs->fs_fsmnt, ino, ip->i_blocks);
297 		ip->i_blocks = 0;
298 	}
299 	ip->i_flags = 0;
300 	/*
301 	 * Set up a new generation number for this inode.
302 	 */
303 	if (++nextgennumber < (u_long)time.tv_sec)
304 		nextgennumber = time.tv_sec;
305 	ip->i_gen = nextgennumber;
306 	return (0);
307 noinodes:
308 	fserr(fs, "out of inodes");
309 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
310 	return (ENOSPC);
311 }
312 
313 /*
314  * Find a cylinder to place a directory.
315  *
316  * The policy implemented by this algorithm is to select from
317  * among those cylinder groups with above the average number of
318  * free inodes, the one with the smallest number of directories.
319  */
320 ino_t
321 dirpref(fs)
322 	register struct fs *fs;
323 {
324 	int cg, minndir, mincg, avgifree;
325 
326 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
327 	minndir = fs->fs_ipg;
328 	mincg = 0;
329 	for (cg = 0; cg < fs->fs_ncg; cg++)
330 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
331 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
332 			mincg = cg;
333 			minndir = fs->fs_cs(fs, cg).cs_ndir;
334 		}
335 	return ((ino_t)(fs->fs_ipg * mincg));
336 }
337 
338 /*
339  * Select the desired position for the next block in a file.  The file is
340  * logically divided into sections. The first section is composed of the
341  * direct blocks. Each additional section contains fs_maxbpg blocks.
342  *
343  * If no blocks have been allocated in the first section, the policy is to
344  * request a block in the same cylinder group as the inode that describes
345  * the file. If no blocks have been allocated in any other section, the
346  * policy is to place the section in a cylinder group with a greater than
347  * average number of free blocks.  An appropriate cylinder group is found
348  * by using a rotor that sweeps the cylinder groups. When a new group of
349  * blocks is needed, the sweep begins in the cylinder group following the
350  * cylinder group from which the previous allocation was made. The sweep
351  * continues until a cylinder group with greater than the average number
352  * of free blocks is found. If the allocation is for the first block in an
353  * indirect block, the information on the previous allocation is unavailable;
354  * here a best guess is made based upon the logical block number being
355  * allocated.
356  *
357  * If a section is already partially allocated, the policy is to
358  * contiguously allocate fs_maxcontig blocks.  The end of one of these
359  * contiguous blocks and the beginning of the next is physically separated
360  * so that the disk head will be in transit between them for at least
361  * fs_rotdelay milliseconds.  This is to allow time for the processor to
362  * schedule another I/O transfer.
363  */
364 daddr_t
365 blkpref(ip, lbn, indx, bap)
366 	struct inode *ip;
367 	daddr_t lbn;
368 	int indx;
369 	daddr_t *bap;
370 {
371 	register struct fs *fs;
372 	register int cg;
373 	int avgbfree, startcg;
374 	daddr_t nextblk;
375 
376 	fs = ip->i_fs;
377 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
378 		if (lbn < NDADDR) {
379 			cg = itog(fs, ip->i_number);
380 			return (fs->fs_fpg * cg + fs->fs_frag);
381 		}
382 		/*
383 		 * Find a cylinder with greater than average number of
384 		 * unused data blocks.
385 		 */
386 		if (indx == 0 || bap[indx - 1] == 0)
387 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
388 		else
389 			startcg = dtog(fs, bap[indx - 1]) + 1;
390 		startcg %= fs->fs_ncg;
391 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
392 		for (cg = startcg; cg < fs->fs_ncg; cg++)
393 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
394 				fs->fs_cgrotor = cg;
395 				return (fs->fs_fpg * cg + fs->fs_frag);
396 			}
397 		for (cg = 0; cg <= startcg; cg++)
398 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
399 				fs->fs_cgrotor = cg;
400 				return (fs->fs_fpg * cg + fs->fs_frag);
401 			}
402 		return (NULL);
403 	}
404 	/*
405 	 * One or more previous blocks have been laid out. If less
406 	 * than fs_maxcontig previous blocks are contiguous, the
407 	 * next block is requested contiguously, otherwise it is
408 	 * requested rotationally delayed by fs_rotdelay milliseconds.
409 	 */
410 	nextblk = bap[indx - 1] + fs->fs_frag;
411 	if (indx > fs->fs_maxcontig &&
412 	    bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
413 	    != nextblk)
414 		return (nextblk);
415 	if (fs->fs_rotdelay != 0)
416 		/*
417 		 * Here we convert ms of delay to frags as:
418 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
419 		 *	((sect/frag) * (ms/sec))
420 		 * then round up to the next block.
421 		 */
422 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
423 		    (NSPF(fs) * 1000), fs->fs_frag);
424 	return (nextblk);
425 }
426 
427 /*
428  * Implement the cylinder overflow algorithm.
429  *
430  * The policy implemented by this algorithm is:
431  *   1) allocate the block in its requested cylinder group.
432  *   2) quadradically rehash on the cylinder group number.
433  *   3) brute force search for a free block.
434  */
435 /*VARARGS5*/
436 u_long
437 hashalloc(ip, cg, pref, size, allocator)
438 	struct inode *ip;
439 	int cg;
440 	long pref;
441 	int size;	/* size for data blocks, mode for inodes */
442 	u_long (*allocator)();
443 {
444 	register struct fs *fs;
445 	long result;
446 	int i, icg = cg;
447 
448 	fs = ip->i_fs;
449 	/*
450 	 * 1: preferred cylinder group
451 	 */
452 	result = (*allocator)(ip, cg, pref, size);
453 	if (result)
454 		return (result);
455 	/*
456 	 * 2: quadratic rehash
457 	 */
458 	for (i = 1; i < fs->fs_ncg; i *= 2) {
459 		cg += i;
460 		if (cg >= fs->fs_ncg)
461 			cg -= fs->fs_ncg;
462 		result = (*allocator)(ip, cg, 0, size);
463 		if (result)
464 			return (result);
465 	}
466 	/*
467 	 * 3: brute force search
468 	 * Note that we start at i == 2, since 0 was checked initially,
469 	 * and 1 is always checked in the quadratic rehash.
470 	 */
471 	cg = (icg + 2) % fs->fs_ncg;
472 	for (i = 2; i < fs->fs_ncg; i++) {
473 		result = (*allocator)(ip, cg, 0, size);
474 		if (result)
475 			return (result);
476 		cg++;
477 		if (cg == fs->fs_ncg)
478 			cg = 0;
479 	}
480 	return (NULL);
481 }
482 
483 /*
484  * Determine whether a fragment can be extended.
485  *
486  * Check to see if the necessary fragments are available, and
487  * if they are, allocate them.
488  */
489 daddr_t
490 fragextend(ip, cg, bprev, osize, nsize)
491 	struct inode *ip;
492 	int cg;
493 	long bprev;
494 	int osize, nsize;
495 {
496 	register struct fs *fs;
497 	register struct cg *cgp;
498 	struct buf *bp;
499 	long bno;
500 	int frags, bbase;
501 	int i, error;
502 
503 	fs = ip->i_fs;
504 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
505 		return (NULL);
506 	frags = numfrags(fs, nsize);
507 	bbase = fragnum(fs, bprev);
508 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
509 		/* cannot extend across a block boundary */
510 		return (NULL);
511 	}
512 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
513 		(int)fs->fs_cgsize, NOCRED, &bp);
514 	if (error) {
515 		brelse(bp);
516 		return (NULL);
517 	}
518 	cgp = bp->b_un.b_cg;
519 	if (!cg_chkmagic(cgp)) {
520 		brelse(bp);
521 		return (NULL);
522 	}
523 	cgp->cg_time = time.tv_sec;
524 	bno = dtogd(fs, bprev);
525 	for (i = numfrags(fs, osize); i < frags; i++)
526 		if (isclr(cg_blksfree(cgp), bno + i)) {
527 			brelse(bp);
528 			return (NULL);
529 		}
530 	/*
531 	 * the current fragment can be extended
532 	 * deduct the count on fragment being extended into
533 	 * increase the count on the remaining fragment (if any)
534 	 * allocate the extended piece
535 	 */
536 	for (i = frags; i < fs->fs_frag - bbase; i++)
537 		if (isclr(cg_blksfree(cgp), bno + i))
538 			break;
539 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
540 	if (i != frags)
541 		cgp->cg_frsum[i - frags]++;
542 	for (i = numfrags(fs, osize); i < frags; i++) {
543 		clrbit(cg_blksfree(cgp), bno + i);
544 		cgp->cg_cs.cs_nffree--;
545 		fs->fs_cstotal.cs_nffree--;
546 		fs->fs_cs(fs, cg).cs_nffree--;
547 	}
548 	fs->fs_fmod++;
549 	bdwrite(bp);
550 	return (bprev);
551 }
552 
553 /*
554  * Determine whether a block can be allocated.
555  *
556  * Check to see if a block of the apprpriate size is available,
557  * and if it is, allocate it.
558  */
559 daddr_t
560 alloccg(ip, cg, bpref, size)
561 	struct inode *ip;
562 	int cg;
563 	daddr_t bpref;
564 	int size;
565 {
566 	register struct fs *fs;
567 	register struct cg *cgp;
568 	struct buf *bp;
569 	register int i;
570 	int error, bno, frags, allocsiz;
571 
572 	fs = ip->i_fs;
573 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
574 		return (NULL);
575 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
576 		(int)fs->fs_cgsize, NOCRED, &bp);
577 	if (error) {
578 		brelse(bp);
579 		return (NULL);
580 	}
581 	cgp = bp->b_un.b_cg;
582 	if (!cg_chkmagic(cgp) ||
583 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
584 		brelse(bp);
585 		return (NULL);
586 	}
587 	cgp->cg_time = time.tv_sec;
588 	if (size == fs->fs_bsize) {
589 		bno = alloccgblk(fs, cgp, bpref);
590 		bdwrite(bp);
591 		return (bno);
592 	}
593 	/*
594 	 * check to see if any fragments are already available
595 	 * allocsiz is the size which will be allocated, hacking
596 	 * it down to a smaller size if necessary
597 	 */
598 	frags = numfrags(fs, size);
599 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
600 		if (cgp->cg_frsum[allocsiz] != 0)
601 			break;
602 	if (allocsiz == fs->fs_frag) {
603 		/*
604 		 * no fragments were available, so a block will be
605 		 * allocated, and hacked up
606 		 */
607 		if (cgp->cg_cs.cs_nbfree == 0) {
608 			brelse(bp);
609 			return (NULL);
610 		}
611 		bno = alloccgblk(fs, cgp, bpref);
612 		bpref = dtogd(fs, bno);
613 		for (i = frags; i < fs->fs_frag; i++)
614 			setbit(cg_blksfree(cgp), bpref + i);
615 		i = fs->fs_frag - frags;
616 		cgp->cg_cs.cs_nffree += i;
617 		fs->fs_cstotal.cs_nffree += i;
618 		fs->fs_cs(fs, cg).cs_nffree += i;
619 		fs->fs_fmod++;
620 		cgp->cg_frsum[i]++;
621 		bdwrite(bp);
622 		return (bno);
623 	}
624 	bno = mapsearch(fs, cgp, bpref, allocsiz);
625 	if (bno < 0) {
626 		brelse(bp);
627 		return (NULL);
628 	}
629 	for (i = 0; i < frags; i++)
630 		clrbit(cg_blksfree(cgp), bno + i);
631 	cgp->cg_cs.cs_nffree -= frags;
632 	fs->fs_cstotal.cs_nffree -= frags;
633 	fs->fs_cs(fs, cg).cs_nffree -= frags;
634 	fs->fs_fmod++;
635 	cgp->cg_frsum[allocsiz]--;
636 	if (frags != allocsiz)
637 		cgp->cg_frsum[allocsiz - frags]++;
638 	bdwrite(bp);
639 	return (cg * fs->fs_fpg + bno);
640 }
641 
642 /*
643  * Allocate a block in a cylinder group.
644  *
645  * This algorithm implements the following policy:
646  *   1) allocate the requested block.
647  *   2) allocate a rotationally optimal block in the same cylinder.
648  *   3) allocate the next available block on the block rotor for the
649  *      specified cylinder group.
650  * Note that this routine only allocates fs_bsize blocks; these
651  * blocks may be fragmented by the routine that allocates them.
652  */
653 daddr_t
654 alloccgblk(fs, cgp, bpref)
655 	register struct fs *fs;
656 	register struct cg *cgp;
657 	daddr_t bpref;
658 {
659 	daddr_t bno;
660 	int cylno, pos, delta;
661 	short *cylbp;
662 	register int i;
663 
664 	if (bpref == 0) {
665 		bpref = cgp->cg_rotor;
666 		goto norot;
667 	}
668 	bpref = blknum(fs, bpref);
669 	bpref = dtogd(fs, bpref);
670 	/*
671 	 * if the requested block is available, use it
672 	 */
673 	if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
674 		bno = bpref;
675 		goto gotit;
676 	}
677 	/*
678 	 * check for a block available on the same cylinder
679 	 */
680 	cylno = cbtocylno(fs, bpref);
681 	if (cg_blktot(cgp)[cylno] == 0)
682 		goto norot;
683 	if (fs->fs_cpc == 0) {
684 		/*
685 		 * block layout info is not available, so just have
686 		 * to take any block in this cylinder.
687 		 */
688 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
689 		goto norot;
690 	}
691 	/*
692 	 * check the summary information to see if a block is
693 	 * available in the requested cylinder starting at the
694 	 * requested rotational position and proceeding around.
695 	 */
696 	cylbp = cg_blks(fs, cgp, cylno);
697 	pos = cbtorpos(fs, bpref);
698 	for (i = pos; i < fs->fs_nrpos; i++)
699 		if (cylbp[i] > 0)
700 			break;
701 	if (i == fs->fs_nrpos)
702 		for (i = 0; i < pos; i++)
703 			if (cylbp[i] > 0)
704 				break;
705 	if (cylbp[i] > 0) {
706 		/*
707 		 * found a rotational position, now find the actual
708 		 * block. A panic if none is actually there.
709 		 */
710 		pos = cylno % fs->fs_cpc;
711 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
712 		if (fs_postbl(fs, pos)[i] == -1) {
713 			printf("pos = %d, i = %d, fs = %s\n",
714 			    pos, i, fs->fs_fsmnt);
715 			panic("alloccgblk: cyl groups corrupted");
716 		}
717 		for (i = fs_postbl(fs, pos)[i];; ) {
718 			if (isblock(fs, cg_blksfree(cgp), bno + i)) {
719 				bno = blkstofrags(fs, (bno + i));
720 				goto gotit;
721 			}
722 			delta = fs_rotbl(fs)[i];
723 			if (delta <= 0 ||
724 			    delta + i > fragstoblks(fs, fs->fs_fpg))
725 				break;
726 			i += delta;
727 		}
728 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
729 		panic("alloccgblk: can't find blk in cyl");
730 	}
731 norot:
732 	/*
733 	 * no blocks in the requested cylinder, so take next
734 	 * available one in this cylinder group.
735 	 */
736 	bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
737 	if (bno < 0)
738 		return (NULL);
739 	cgp->cg_rotor = bno;
740 gotit:
741 	clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
742 	cgp->cg_cs.cs_nbfree--;
743 	fs->fs_cstotal.cs_nbfree--;
744 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
745 	cylno = cbtocylno(fs, bno);
746 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
747 	cg_blktot(cgp)[cylno]--;
748 	fs->fs_fmod++;
749 	return (cgp->cg_cgx * fs->fs_fpg + bno);
750 }
751 
752 /*
753  * Determine whether an inode can be allocated.
754  *
755  * Check to see if an inode is available, and if it is,
756  * allocate it using the following policy:
757  *   1) allocate the requested inode.
758  *   2) allocate the next available inode after the requested
759  *      inode in the specified cylinder group.
760  */
761 ino_t
762 ialloccg(ip, cg, ipref, mode)
763 	struct inode *ip;
764 	int cg;
765 	daddr_t ipref;
766 	int mode;
767 {
768 	register struct fs *fs;
769 	register struct cg *cgp;
770 	struct buf *bp;
771 	int error, start, len, loc, map, i;
772 
773 	fs = ip->i_fs;
774 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
775 		return (NULL);
776 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
777 		(int)fs->fs_cgsize, NOCRED, &bp);
778 	if (error) {
779 		brelse(bp);
780 		return (NULL);
781 	}
782 	cgp = bp->b_un.b_cg;
783 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
784 		brelse(bp);
785 		return (NULL);
786 	}
787 	cgp->cg_time = time.tv_sec;
788 	if (ipref) {
789 		ipref %= fs->fs_ipg;
790 		if (isclr(cg_inosused(cgp), ipref))
791 			goto gotit;
792 	}
793 	start = cgp->cg_irotor / NBBY;
794 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
795 	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
796 	if (loc == 0) {
797 		len = start + 1;
798 		start = 0;
799 		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
800 		if (loc == 0) {
801 			printf("cg = %s, irotor = %d, fs = %s\n",
802 			    cg, cgp->cg_irotor, fs->fs_fsmnt);
803 			panic("ialloccg: map corrupted");
804 			/* NOTREACHED */
805 		}
806 	}
807 	i = start + len - loc;
808 	map = cg_inosused(cgp)[i];
809 	ipref = i * NBBY;
810 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
811 		if ((map & i) == 0) {
812 			cgp->cg_irotor = ipref;
813 			goto gotit;
814 		}
815 	}
816 	printf("fs = %s\n", fs->fs_fsmnt);
817 	panic("ialloccg: block not in map");
818 	/* NOTREACHED */
819 gotit:
820 	setbit(cg_inosused(cgp), ipref);
821 	cgp->cg_cs.cs_nifree--;
822 	fs->fs_cstotal.cs_nifree--;
823 	fs->fs_cs(fs, cg).cs_nifree--;
824 	fs->fs_fmod++;
825 	if ((mode & IFMT) == IFDIR) {
826 		cgp->cg_cs.cs_ndir++;
827 		fs->fs_cstotal.cs_ndir++;
828 		fs->fs_cs(fs, cg).cs_ndir++;
829 	}
830 	bdwrite(bp);
831 	return (cg * fs->fs_ipg + ipref);
832 }
833 
834 /*
835  * Free a block or fragment.
836  *
837  * The specified block or fragment is placed back in the
838  * free map. If a fragment is deallocated, a possible
839  * block reassembly is checked.
840  */
841 blkfree(ip, bno, size)
842 	register struct inode *ip;
843 	daddr_t bno;
844 	off_t size;
845 {
846 	register struct fs *fs;
847 	register struct cg *cgp;
848 	struct buf *bp;
849 	int error, cg, blk, frags, bbase;
850 	register int i;
851 
852 	fs = ip->i_fs;
853 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
854 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
855 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
856 		panic("blkfree: bad size");
857 	}
858 	cg = dtog(fs, bno);
859 	if (badblock(fs, bno)) {
860 		printf("bad block %d, ino %d\n", bno, ip->i_number);
861 		return;
862 	}
863 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
864 		(int)fs->fs_cgsize, NOCRED, &bp);
865 	if (error) {
866 		brelse(bp);
867 		return;
868 	}
869 	cgp = bp->b_un.b_cg;
870 	if (!cg_chkmagic(cgp)) {
871 		brelse(bp);
872 		return;
873 	}
874 	cgp->cg_time = time.tv_sec;
875 	bno = dtogd(fs, bno);
876 	if (size == fs->fs_bsize) {
877 		if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
878 			printf("dev = 0x%x, block = %d, fs = %s\n",
879 			    ip->i_dev, bno, fs->fs_fsmnt);
880 			panic("blkfree: freeing free block");
881 		}
882 		setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
883 		cgp->cg_cs.cs_nbfree++;
884 		fs->fs_cstotal.cs_nbfree++;
885 		fs->fs_cs(fs, cg).cs_nbfree++;
886 		i = cbtocylno(fs, bno);
887 		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
888 		cg_blktot(cgp)[i]++;
889 	} else {
890 		bbase = bno - fragnum(fs, bno);
891 		/*
892 		 * decrement the counts associated with the old frags
893 		 */
894 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
895 		fragacct(fs, blk, cgp->cg_frsum, -1);
896 		/*
897 		 * deallocate the fragment
898 		 */
899 		frags = numfrags(fs, size);
900 		for (i = 0; i < frags; i++) {
901 			if (isset(cg_blksfree(cgp), bno + i)) {
902 				printf("dev = 0x%x, block = %d, fs = %s\n",
903 				    ip->i_dev, bno + i, fs->fs_fsmnt);
904 				panic("blkfree: freeing free frag");
905 			}
906 			setbit(cg_blksfree(cgp), bno + i);
907 		}
908 		cgp->cg_cs.cs_nffree += i;
909 		fs->fs_cstotal.cs_nffree += i;
910 		fs->fs_cs(fs, cg).cs_nffree += i;
911 		/*
912 		 * add back in counts associated with the new frags
913 		 */
914 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
915 		fragacct(fs, blk, cgp->cg_frsum, 1);
916 		/*
917 		 * if a complete block has been reassembled, account for it
918 		 */
919 		if (isblock(fs, cg_blksfree(cgp),
920 		    (daddr_t)fragstoblks(fs, bbase))) {
921 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
922 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
923 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
924 			cgp->cg_cs.cs_nbfree++;
925 			fs->fs_cstotal.cs_nbfree++;
926 			fs->fs_cs(fs, cg).cs_nbfree++;
927 			i = cbtocylno(fs, bbase);
928 			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
929 			cg_blktot(cgp)[i]++;
930 		}
931 	}
932 	fs->fs_fmod++;
933 	bdwrite(bp);
934 }
935 
936 /*
937  * Free an inode.
938  *
939  * The specified inode is placed back in the free map.
940  */
941 ifree(ip, ino, mode)
942 	struct inode *ip;
943 	ino_t ino;
944 	int mode;
945 {
946 	register struct fs *fs;
947 	register struct cg *cgp;
948 	struct buf *bp;
949 	int error, cg;
950 
951 	fs = ip->i_fs;
952 	if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
953 		printf("dev = 0x%x, ino = %d, fs = %s\n",
954 		    ip->i_dev, ino, fs->fs_fsmnt);
955 		panic("ifree: range");
956 	}
957 	cg = itog(fs, ino);
958 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
959 		(int)fs->fs_cgsize, NOCRED, &bp);
960 	if (error) {
961 		brelse(bp);
962 		return;
963 	}
964 	cgp = bp->b_un.b_cg;
965 	if (!cg_chkmagic(cgp)) {
966 		brelse(bp);
967 		return;
968 	}
969 	cgp->cg_time = time.tv_sec;
970 	ino %= fs->fs_ipg;
971 	if (isclr(cg_inosused(cgp), ino)) {
972 		printf("dev = 0x%x, ino = %d, fs = %s\n",
973 		    ip->i_dev, ino, fs->fs_fsmnt);
974 		panic("ifree: freeing free inode");
975 	}
976 	clrbit(cg_inosused(cgp), ino);
977 	if (ino < cgp->cg_irotor)
978 		cgp->cg_irotor = ino;
979 	cgp->cg_cs.cs_nifree++;
980 	fs->fs_cstotal.cs_nifree++;
981 	fs->fs_cs(fs, cg).cs_nifree++;
982 	if ((mode & IFMT) == IFDIR) {
983 		cgp->cg_cs.cs_ndir--;
984 		fs->fs_cstotal.cs_ndir--;
985 		fs->fs_cs(fs, cg).cs_ndir--;
986 	}
987 	fs->fs_fmod++;
988 	bdwrite(bp);
989 }
990 
991 /*
992  * Find a block of the specified size in the specified cylinder group.
993  *
994  * It is a panic if a request is made to find a block if none are
995  * available.
996  */
997 daddr_t
998 mapsearch(fs, cgp, bpref, allocsiz)
999 	register struct fs *fs;
1000 	register struct cg *cgp;
1001 	daddr_t bpref;
1002 	int allocsiz;
1003 {
1004 	daddr_t bno;
1005 	int start, len, loc, i;
1006 	int blk, field, subfield, pos;
1007 
1008 	/*
1009 	 * find the fragment by searching through the free block
1010 	 * map for an appropriate bit pattern
1011 	 */
1012 	if (bpref)
1013 		start = dtogd(fs, bpref) / NBBY;
1014 	else
1015 		start = cgp->cg_frotor / NBBY;
1016 	len = howmany(fs->fs_fpg, NBBY) - start;
1017 	loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start],
1018 		(u_char *)fragtbl[fs->fs_frag],
1019 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1020 	if (loc == 0) {
1021 		len = start + 1;
1022 		start = 0;
1023 		loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0],
1024 			(u_char *)fragtbl[fs->fs_frag],
1025 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1026 		if (loc == 0) {
1027 			printf("start = %d, len = %d, fs = %s\n",
1028 			    start, len, fs->fs_fsmnt);
1029 			panic("alloccg: map corrupted");
1030 			/* NOTREACHED */
1031 		}
1032 	}
1033 	bno = (start + len - loc) * NBBY;
1034 	cgp->cg_frotor = bno;
1035 	/*
1036 	 * found the byte in the map
1037 	 * sift through the bits to find the selected frag
1038 	 */
1039 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1040 		blk = blkmap(fs, cg_blksfree(cgp), bno);
1041 		blk <<= 1;
1042 		field = around[allocsiz];
1043 		subfield = inside[allocsiz];
1044 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1045 			if ((blk & field) == subfield)
1046 				return (bno + pos);
1047 			field <<= 1;
1048 			subfield <<= 1;
1049 		}
1050 	}
1051 	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1052 	panic("alloccg: block not in map");
1053 	return (-1);
1054 }
1055 
1056 /*
1057  * Check that a specified block number is in range.
1058  */
1059 badblock(fs, bn)
1060 	register struct fs *fs;
1061 	daddr_t bn;
1062 {
1063 
1064 	if ((unsigned)bn >= fs->fs_size) {
1065 		printf("bad block %d, ", bn);
1066 		fserr(fs, "bad block");
1067 		return (1);
1068 	}
1069 	return (0);
1070 }
1071 
1072 /*
1073  * Fserr prints the name of a file system with an error diagnostic.
1074  *
1075  * The form of the error message is:
1076  *	fs: error message
1077  */
1078 fserr(fs, cp)
1079 	struct fs *fs;
1080 	char *cp;
1081 {
1082 
1083 	log(LOG_ERR, "%s: %s\n", fs->fs_fsmnt, cp);
1084 }
1085