xref: /csrg-svn/sys/ufs/lfs/lfs_alloc.c (revision 38776)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)lfs_alloc.c	7.11 (Berkeley) 08/26/89
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "buf.h"
24 #include "user.h"
25 #include "vnode.h"
26 #include "kernel.h"
27 #include "syslog.h"
28 #include "cmap.h"
29 #include "../ufs/quota.h"
30 #include "../ufs/inode.h"
31 #include "../ufs/fs.h"
32 
33 extern u_long		hashalloc();
34 extern ino_t		ialloccg();
35 extern daddr_t		alloccg();
36 extern daddr_t		alloccgblk();
37 extern daddr_t		fragextend();
38 extern daddr_t		blkpref();
39 extern daddr_t		mapsearch();
40 extern int		inside[], around[];
41 extern unsigned char	*fragtbl[];
42 
43 /*
44  * Allocate a block in the file system.
45  *
46  * The size of the requested block is given, which must be some
47  * multiple of fs_fsize and <= fs_bsize.
48  * A preference may be optionally specified. If a preference is given
49  * the following hierarchy is used to allocate a block:
50  *   1) allocate the requested block.
51  *   2) allocate a rotationally optimal block in the same cylinder.
52  *   3) allocate a block in the same cylinder group.
53  *   4) quadradically rehash into other cylinder groups, until an
54  *      available block is located.
55  * If no block preference is given the following heirarchy is used
56  * to allocate a block:
57  *   1) allocate a block in the cylinder group that contains the
58  *      inode for the file.
59  *   2) quadradically rehash into other cylinder groups, until an
60  *      available block is located.
61  */
62 alloc(ip, bpref, size, bpp, flags)
63 	register struct inode *ip;
64 	daddr_t bpref;
65 	int size;
66 	struct buf **bpp;
67 	int flags;
68 {
69 	daddr_t bno;
70 	register struct fs *fs;
71 	register struct buf *bp;
72 	int cg, error;
73 
74 	*bpp = 0;
75 	fs = ip->i_fs;
76 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
77 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
78 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
79 		panic("alloc: bad size");
80 	}
81 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
82 		goto nospace;
83 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
84 		goto nospace;
85 #ifdef QUOTA
86 	if (error = chkdq(ip, (long)btodb(size), 0))
87 		return (error);
88 #endif
89 	if (bpref >= fs->fs_size)
90 		bpref = 0;
91 	if (bpref == 0)
92 		cg = itog(fs, ip->i_number);
93 	else
94 		cg = dtog(fs, bpref);
95 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
96 		(u_long (*)())alloccg);
97 	if (bno <= 0)
98 		goto nospace;
99 	ip->i_blocks += btodb(size);
100 	ip->i_flag |= IUPD|ICHG;
101 	bp = getblk(ip->i_devvp, fsbtodb(fs, bno), size);
102 	if (flags & B_CLRBUF)
103 		clrbuf(bp);
104 	*bpp = bp;
105 	return (0);
106 nospace:
107 	fserr(fs, "file system full");
108 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
109 	return (ENOSPC);
110 }
111 
112 /*
113  * Reallocate a fragment to a bigger size
114  *
115  * The number and size of the old block is given, and a preference
116  * and new size is also specified. The allocator attempts to extend
117  * the original block. Failing that, the regular block allocator is
118  * invoked to get an appropriate block.
119  */
120 realloccg(ip, bprev, bpref, osize, nsize, bpp)
121 	register struct inode *ip;
122 	daddr_t bprev, bpref;
123 	int osize, nsize;
124 	struct buf **bpp;
125 {
126 	register struct fs *fs;
127 	struct buf *bp, *obp;
128 	int cg, request;
129 	daddr_t bno, bn;
130 	int i, error, count;
131 
132 	*bpp = 0;
133 	fs = ip->i_fs;
134 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
135 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
136 		printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
137 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
138 		panic("realloccg: bad size");
139 	}
140 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
141 		goto nospace;
142 	if (bprev == 0) {
143 		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
144 		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
145 		panic("realloccg: bad bprev");
146 	}
147 #ifdef QUOTA
148 	if (error = chkdq(ip, (long)btodb(nsize - osize), 0))
149 		return (error);
150 #endif
151 	cg = dtog(fs, bprev);
152 	bno = fragextend(ip, cg, (long)bprev, osize, nsize);
153 	if (bno != 0) {
154 		do {
155 			error = bread(ip->i_devvp, fsbtodb(fs, bno),
156 				osize, NOCRED, &bp);
157 			if (error) {
158 				brelse(bp);
159 				return (error);
160 			}
161 		} while (brealloc(bp, nsize) == 0);
162 		bp->b_flags |= B_DONE;
163 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
164 		ip->i_blocks += btodb(nsize - osize);
165 		ip->i_flag |= IUPD|ICHG;
166 		*bpp = bp;
167 		return (0);
168 	}
169 	if (bpref >= fs->fs_size)
170 		bpref = 0;
171 	switch ((int)fs->fs_optim) {
172 	case FS_OPTSPACE:
173 		/*
174 		 * Allocate an exact sized fragment. Although this makes
175 		 * best use of space, we will waste time relocating it if
176 		 * the file continues to grow. If the fragmentation is
177 		 * less than half of the minimum free reserve, we choose
178 		 * to begin optimizing for time.
179 		 */
180 		request = nsize;
181 		if (fs->fs_minfree < 5 ||
182 		    fs->fs_cstotal.cs_nffree >
183 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
184 			break;
185 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
186 			fs->fs_fsmnt);
187 		fs->fs_optim = FS_OPTTIME;
188 		break;
189 	case FS_OPTTIME:
190 		/*
191 		 * At this point we have discovered a file that is trying
192 		 * to grow a small fragment to a larger fragment. To save
193 		 * time, we allocate a full sized block, then free the
194 		 * unused portion. If the file continues to grow, the
195 		 * `fragextend' call above will be able to grow it in place
196 		 * without further copying. If aberrant programs cause
197 		 * disk fragmentation to grow within 2% of the free reserve,
198 		 * we choose to begin optimizing for space.
199 		 */
200 		request = fs->fs_bsize;
201 		if (fs->fs_cstotal.cs_nffree <
202 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
203 			break;
204 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
205 			fs->fs_fsmnt);
206 		fs->fs_optim = FS_OPTSPACE;
207 		break;
208 	default:
209 		printf("dev = 0x%x, optim = %d, fs = %s\n",
210 		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
211 		panic("realloccg: bad optim");
212 		/* NOTREACHED */
213 	}
214 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
215 		(u_long (*)())alloccg);
216 	if (bno > 0) {
217 		error = bread(ip->i_devvp, fsbtodb(fs, bprev),
218 			osize, NOCRED, &obp);
219 		if (error) {
220 			brelse(obp);
221 			return (error);
222 		}
223 		bn = fsbtodb(fs, bno);
224 		bp = getblk(ip->i_devvp, bn, nsize);
225 		bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize);
226 		count = howmany(osize, CLBYTES);
227 		for (i = 0; i < count; i++)
228 			munhash(ip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
229 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
230 		if (obp->b_flags & B_DELWRI) {
231 			obp->b_flags &= ~B_DELWRI;
232 			u.u_ru.ru_oublock--;		/* delete charge */
233 		}
234 		brelse(obp);
235 		blkfree(ip, bprev, (off_t)osize);
236 		if (nsize < request)
237 			blkfree(ip, bno + numfrags(fs, nsize),
238 				(off_t)(request - nsize));
239 		ip->i_blocks += btodb(nsize - osize);
240 		ip->i_flag |= IUPD|ICHG;
241 		*bpp = bp;
242 		return (0);
243 	}
244 nospace:
245 	/*
246 	 * no space available
247 	 */
248 	fserr(fs, "file system full");
249 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
250 	return (ENOSPC);
251 }
252 
253 /*
254  * Allocate an inode in the file system.
255  *
256  * A preference may be optionally specified. If a preference is given
257  * the following hierarchy is used to allocate an inode:
258  *   1) allocate the requested inode.
259  *   2) allocate an inode in the same cylinder group.
260  *   3) quadradically rehash into other cylinder groups, until an
261  *      available inode is located.
262  * If no inode preference is given the following heirarchy is used
263  * to allocate an inode:
264  *   1) allocate an inode in cylinder group 0.
265  *   2) quadradically rehash into other cylinder groups, until an
266  *      available inode is located.
267  */
268 ialloc(pip, ipref, mode, ipp)
269 	register struct inode *pip;
270 	ino_t ipref;
271 	int mode;
272 	struct inode **ipp;
273 {
274 	ino_t ino;
275 	register struct fs *fs;
276 	register struct inode *ip;
277 	int cg, error;
278 
279 	*ipp = 0;
280 	fs = pip->i_fs;
281 	if (fs->fs_cstotal.cs_nifree == 0)
282 		goto noinodes;
283 #ifdef QUOTA
284 	if (error = chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0))
285 		return (error);
286 #endif
287 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
288 		ipref = 0;
289 	cg = itog(fs, ipref);
290 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
291 	if (ino == 0)
292 		goto noinodes;
293 	error = iget(pip, ino, ipp);
294 	if (error) {
295 		ifree(pip, ino, 0);
296 		return (error);
297 	}
298 	ip = *ipp;
299 	if (ip->i_mode) {
300 		printf("mode = 0%o, inum = %d, fs = %s\n",
301 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
302 		panic("ialloc: dup alloc");
303 	}
304 	if (ip->i_blocks) {				/* XXX */
305 		printf("free inode %s/%d had %d blocks\n",
306 		    fs->fs_fsmnt, ino, ip->i_blocks);
307 		ip->i_blocks = 0;
308 	}
309 	/*
310 	 * Set up a new generation number for this inode.
311 	 */
312 	if (++nextgennumber < (u_long)time.tv_sec)
313 		nextgennumber = time.tv_sec;
314 	ip->i_gen = nextgennumber;
315 	return (0);
316 noinodes:
317 	fserr(fs, "out of inodes");
318 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
319 	return (ENOSPC);
320 }
321 
322 /*
323  * Find a cylinder to place a directory.
324  *
325  * The policy implemented by this algorithm is to select from
326  * among those cylinder groups with above the average number of
327  * free inodes, the one with the smallest number of directories.
328  */
329 ino_t
330 dirpref(fs)
331 	register struct fs *fs;
332 {
333 	int cg, minndir, mincg, avgifree;
334 
335 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
336 	minndir = fs->fs_ipg;
337 	mincg = 0;
338 	for (cg = 0; cg < fs->fs_ncg; cg++)
339 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
340 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
341 			mincg = cg;
342 			minndir = fs->fs_cs(fs, cg).cs_ndir;
343 		}
344 	return ((ino_t)(fs->fs_ipg * mincg));
345 }
346 
347 /*
348  * Select the desired position for the next block in a file.  The file is
349  * logically divided into sections. The first section is composed of the
350  * direct blocks. Each additional section contains fs_maxbpg blocks.
351  *
352  * If no blocks have been allocated in the first section, the policy is to
353  * request a block in the same cylinder group as the inode that describes
354  * the file. If no blocks have been allocated in any other section, the
355  * policy is to place the section in a cylinder group with a greater than
356  * average number of free blocks.  An appropriate cylinder group is found
357  * by using a rotor that sweeps the cylinder groups. When a new group of
358  * blocks is needed, the sweep begins in the cylinder group following the
359  * cylinder group from which the previous allocation was made. The sweep
360  * continues until a cylinder group with greater than the average number
361  * of free blocks is found. If the allocation is for the first block in an
362  * indirect block, the information on the previous allocation is unavailable;
363  * here a best guess is made based upon the logical block number being
364  * allocated.
365  *
366  * If a section is already partially allocated, the policy is to
367  * contiguously allocate fs_maxcontig blocks.  The end of one of these
368  * contiguous blocks and the beginning of the next is physically separated
369  * so that the disk head will be in transit between them for at least
370  * fs_rotdelay milliseconds.  This is to allow time for the processor to
371  * schedule another I/O transfer.
372  */
373 daddr_t
374 blkpref(ip, lbn, indx, bap)
375 	struct inode *ip;
376 	daddr_t lbn;
377 	int indx;
378 	daddr_t *bap;
379 {
380 	register struct fs *fs;
381 	register int cg;
382 	int avgbfree, startcg;
383 	daddr_t nextblk;
384 
385 	fs = ip->i_fs;
386 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
387 		if (lbn < NDADDR) {
388 			cg = itog(fs, ip->i_number);
389 			return (fs->fs_fpg * cg + fs->fs_frag);
390 		}
391 		/*
392 		 * Find a cylinder with greater than average number of
393 		 * unused data blocks.
394 		 */
395 		if (indx == 0 || bap[indx - 1] == 0)
396 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
397 		else
398 			startcg = dtog(fs, bap[indx - 1]) + 1;
399 		startcg %= fs->fs_ncg;
400 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
401 		for (cg = startcg; cg < fs->fs_ncg; cg++)
402 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
403 				fs->fs_cgrotor = cg;
404 				return (fs->fs_fpg * cg + fs->fs_frag);
405 			}
406 		for (cg = 0; cg <= startcg; cg++)
407 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
408 				fs->fs_cgrotor = cg;
409 				return (fs->fs_fpg * cg + fs->fs_frag);
410 			}
411 		return (NULL);
412 	}
413 	/*
414 	 * One or more previous blocks have been laid out. If less
415 	 * than fs_maxcontig previous blocks are contiguous, the
416 	 * next block is requested contiguously, otherwise it is
417 	 * requested rotationally delayed by fs_rotdelay milliseconds.
418 	 */
419 	nextblk = bap[indx - 1] + fs->fs_frag;
420 	if (indx > fs->fs_maxcontig &&
421 	    bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
422 	    != nextblk)
423 		return (nextblk);
424 	if (fs->fs_rotdelay != 0)
425 		/*
426 		 * Here we convert ms of delay to frags as:
427 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
428 		 *	((sect/frag) * (ms/sec))
429 		 * then round up to the next block.
430 		 */
431 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
432 		    (NSPF(fs) * 1000), fs->fs_frag);
433 	return (nextblk);
434 }
435 
436 /*
437  * Implement the cylinder overflow algorithm.
438  *
439  * The policy implemented by this algorithm is:
440  *   1) allocate the block in its requested cylinder group.
441  *   2) quadradically rehash on the cylinder group number.
442  *   3) brute force search for a free block.
443  */
444 /*VARARGS5*/
445 u_long
446 hashalloc(ip, cg, pref, size, allocator)
447 	struct inode *ip;
448 	int cg;
449 	long pref;
450 	int size;	/* size for data blocks, mode for inodes */
451 	u_long (*allocator)();
452 {
453 	register struct fs *fs;
454 	long result;
455 	int i, icg = cg;
456 
457 	fs = ip->i_fs;
458 	/*
459 	 * 1: preferred cylinder group
460 	 */
461 	result = (*allocator)(ip, cg, pref, size);
462 	if (result)
463 		return (result);
464 	/*
465 	 * 2: quadratic rehash
466 	 */
467 	for (i = 1; i < fs->fs_ncg; i *= 2) {
468 		cg += i;
469 		if (cg >= fs->fs_ncg)
470 			cg -= fs->fs_ncg;
471 		result = (*allocator)(ip, cg, 0, size);
472 		if (result)
473 			return (result);
474 	}
475 	/*
476 	 * 3: brute force search
477 	 * Note that we start at i == 2, since 0 was checked initially,
478 	 * and 1 is always checked in the quadratic rehash.
479 	 */
480 	cg = (icg + 2) % fs->fs_ncg;
481 	for (i = 2; i < fs->fs_ncg; i++) {
482 		result = (*allocator)(ip, cg, 0, size);
483 		if (result)
484 			return (result);
485 		cg++;
486 		if (cg == fs->fs_ncg)
487 			cg = 0;
488 	}
489 	return (NULL);
490 }
491 
492 /*
493  * Determine whether a fragment can be extended.
494  *
495  * Check to see if the necessary fragments are available, and
496  * if they are, allocate them.
497  */
498 daddr_t
499 fragextend(ip, cg, bprev, osize, nsize)
500 	struct inode *ip;
501 	int cg;
502 	long bprev;
503 	int osize, nsize;
504 {
505 	register struct fs *fs;
506 	register struct cg *cgp;
507 	struct buf *bp;
508 	long bno;
509 	int frags, bbase;
510 	int i, error;
511 
512 	fs = ip->i_fs;
513 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
514 		return (NULL);
515 	frags = numfrags(fs, nsize);
516 	bbase = fragnum(fs, bprev);
517 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
518 		/* cannot extend across a block boundary */
519 		return (NULL);
520 	}
521 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
522 		(int)fs->fs_cgsize, NOCRED, &bp);
523 	if (error) {
524 		brelse(bp);
525 		return (NULL);
526 	}
527 	cgp = bp->b_un.b_cg;
528 	if (!cg_chkmagic(cgp)) {
529 		brelse(bp);
530 		return (NULL);
531 	}
532 	cgp->cg_time = time.tv_sec;
533 	bno = dtogd(fs, bprev);
534 	for (i = numfrags(fs, osize); i < frags; i++)
535 		if (isclr(cg_blksfree(cgp), bno + i)) {
536 			brelse(bp);
537 			return (NULL);
538 		}
539 	/*
540 	 * the current fragment can be extended
541 	 * deduct the count on fragment being extended into
542 	 * increase the count on the remaining fragment (if any)
543 	 * allocate the extended piece
544 	 */
545 	for (i = frags; i < fs->fs_frag - bbase; i++)
546 		if (isclr(cg_blksfree(cgp), bno + i))
547 			break;
548 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
549 	if (i != frags)
550 		cgp->cg_frsum[i - frags]++;
551 	for (i = numfrags(fs, osize); i < frags; i++) {
552 		clrbit(cg_blksfree(cgp), bno + i);
553 		cgp->cg_cs.cs_nffree--;
554 		fs->fs_cstotal.cs_nffree--;
555 		fs->fs_cs(fs, cg).cs_nffree--;
556 	}
557 	fs->fs_fmod++;
558 	bdwrite(bp);
559 	return (bprev);
560 }
561 
562 /*
563  * Determine whether a block can be allocated.
564  *
565  * Check to see if a block of the apprpriate size is available,
566  * and if it is, allocate it.
567  */
568 daddr_t
569 alloccg(ip, cg, bpref, size)
570 	struct inode *ip;
571 	int cg;
572 	daddr_t bpref;
573 	int size;
574 {
575 	register struct fs *fs;
576 	register struct cg *cgp;
577 	struct buf *bp;
578 	register int i;
579 	int error, bno, frags, allocsiz;
580 
581 	fs = ip->i_fs;
582 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
583 		return (NULL);
584 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
585 		(int)fs->fs_cgsize, NOCRED, &bp);
586 	if (error) {
587 		brelse(bp);
588 		return (NULL);
589 	}
590 	cgp = bp->b_un.b_cg;
591 	if (!cg_chkmagic(cgp) ||
592 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
593 		brelse(bp);
594 		return (NULL);
595 	}
596 	cgp->cg_time = time.tv_sec;
597 	if (size == fs->fs_bsize) {
598 		bno = alloccgblk(fs, cgp, bpref);
599 		bdwrite(bp);
600 		return (bno);
601 	}
602 	/*
603 	 * check to see if any fragments are already available
604 	 * allocsiz is the size which will be allocated, hacking
605 	 * it down to a smaller size if necessary
606 	 */
607 	frags = numfrags(fs, size);
608 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
609 		if (cgp->cg_frsum[allocsiz] != 0)
610 			break;
611 	if (allocsiz == fs->fs_frag) {
612 		/*
613 		 * no fragments were available, so a block will be
614 		 * allocated, and hacked up
615 		 */
616 		if (cgp->cg_cs.cs_nbfree == 0) {
617 			brelse(bp);
618 			return (NULL);
619 		}
620 		bno = alloccgblk(fs, cgp, bpref);
621 		bpref = dtogd(fs, bno);
622 		for (i = frags; i < fs->fs_frag; i++)
623 			setbit(cg_blksfree(cgp), bpref + i);
624 		i = fs->fs_frag - frags;
625 		cgp->cg_cs.cs_nffree += i;
626 		fs->fs_cstotal.cs_nffree += i;
627 		fs->fs_cs(fs, cg).cs_nffree += i;
628 		fs->fs_fmod++;
629 		cgp->cg_frsum[i]++;
630 		bdwrite(bp);
631 		return (bno);
632 	}
633 	bno = mapsearch(fs, cgp, bpref, allocsiz);
634 	if (bno < 0) {
635 		brelse(bp);
636 		return (NULL);
637 	}
638 	for (i = 0; i < frags; i++)
639 		clrbit(cg_blksfree(cgp), bno + i);
640 	cgp->cg_cs.cs_nffree -= frags;
641 	fs->fs_cstotal.cs_nffree -= frags;
642 	fs->fs_cs(fs, cg).cs_nffree -= frags;
643 	fs->fs_fmod++;
644 	cgp->cg_frsum[allocsiz]--;
645 	if (frags != allocsiz)
646 		cgp->cg_frsum[allocsiz - frags]++;
647 	bdwrite(bp);
648 	return (cg * fs->fs_fpg + bno);
649 }
650 
651 /*
652  * Allocate a block in a cylinder group.
653  *
654  * This algorithm implements the following policy:
655  *   1) allocate the requested block.
656  *   2) allocate a rotationally optimal block in the same cylinder.
657  *   3) allocate the next available block on the block rotor for the
658  *      specified cylinder group.
659  * Note that this routine only allocates fs_bsize blocks; these
660  * blocks may be fragmented by the routine that allocates them.
661  */
662 daddr_t
663 alloccgblk(fs, cgp, bpref)
664 	register struct fs *fs;
665 	register struct cg *cgp;
666 	daddr_t bpref;
667 {
668 	daddr_t bno;
669 	int cylno, pos, delta;
670 	short *cylbp;
671 	register int i;
672 
673 	if (bpref == 0) {
674 		bpref = cgp->cg_rotor;
675 		goto norot;
676 	}
677 	bpref = blknum(fs, bpref);
678 	bpref = dtogd(fs, bpref);
679 	/*
680 	 * if the requested block is available, use it
681 	 */
682 	if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
683 		bno = bpref;
684 		goto gotit;
685 	}
686 	/*
687 	 * check for a block available on the same cylinder
688 	 */
689 	cylno = cbtocylno(fs, bpref);
690 	if (cg_blktot(cgp)[cylno] == 0)
691 		goto norot;
692 	if (fs->fs_cpc == 0) {
693 		/*
694 		 * block layout info is not available, so just have
695 		 * to take any block in this cylinder.
696 		 */
697 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
698 		goto norot;
699 	}
700 	/*
701 	 * check the summary information to see if a block is
702 	 * available in the requested cylinder starting at the
703 	 * requested rotational position and proceeding around.
704 	 */
705 	cylbp = cg_blks(fs, cgp, cylno);
706 	pos = cbtorpos(fs, bpref);
707 	for (i = pos; i < fs->fs_nrpos; i++)
708 		if (cylbp[i] > 0)
709 			break;
710 	if (i == fs->fs_nrpos)
711 		for (i = 0; i < pos; i++)
712 			if (cylbp[i] > 0)
713 				break;
714 	if (cylbp[i] > 0) {
715 		/*
716 		 * found a rotational position, now find the actual
717 		 * block. A panic if none is actually there.
718 		 */
719 		pos = cylno % fs->fs_cpc;
720 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
721 		if (fs_postbl(fs, pos)[i] == -1) {
722 			printf("pos = %d, i = %d, fs = %s\n",
723 			    pos, i, fs->fs_fsmnt);
724 			panic("alloccgblk: cyl groups corrupted");
725 		}
726 		for (i = fs_postbl(fs, pos)[i];; ) {
727 			if (isblock(fs, cg_blksfree(cgp), bno + i)) {
728 				bno = blkstofrags(fs, (bno + i));
729 				goto gotit;
730 			}
731 			delta = fs_rotbl(fs)[i];
732 			if (delta <= 0 ||
733 			    delta + i > fragstoblks(fs, fs->fs_fpg))
734 				break;
735 			i += delta;
736 		}
737 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
738 		panic("alloccgblk: can't find blk in cyl");
739 	}
740 norot:
741 	/*
742 	 * no blocks in the requested cylinder, so take next
743 	 * available one in this cylinder group.
744 	 */
745 	bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
746 	if (bno < 0)
747 		return (NULL);
748 	cgp->cg_rotor = bno;
749 gotit:
750 	clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
751 	cgp->cg_cs.cs_nbfree--;
752 	fs->fs_cstotal.cs_nbfree--;
753 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
754 	cylno = cbtocylno(fs, bno);
755 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
756 	cg_blktot(cgp)[cylno]--;
757 	fs->fs_fmod++;
758 	return (cgp->cg_cgx * fs->fs_fpg + bno);
759 }
760 
761 /*
762  * Determine whether an inode can be allocated.
763  *
764  * Check to see if an inode is available, and if it is,
765  * allocate it using the following policy:
766  *   1) allocate the requested inode.
767  *   2) allocate the next available inode after the requested
768  *      inode in the specified cylinder group.
769  */
770 ino_t
771 ialloccg(ip, cg, ipref, mode)
772 	struct inode *ip;
773 	int cg;
774 	daddr_t ipref;
775 	int mode;
776 {
777 	register struct fs *fs;
778 	register struct cg *cgp;
779 	struct buf *bp;
780 	int error, start, len, loc, map, i;
781 
782 	fs = ip->i_fs;
783 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
784 		return (NULL);
785 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
786 		(int)fs->fs_cgsize, NOCRED, &bp);
787 	if (error) {
788 		brelse(bp);
789 		return (NULL);
790 	}
791 	cgp = bp->b_un.b_cg;
792 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
793 		brelse(bp);
794 		return (NULL);
795 	}
796 	cgp->cg_time = time.tv_sec;
797 	if (ipref) {
798 		ipref %= fs->fs_ipg;
799 		if (isclr(cg_inosused(cgp), ipref))
800 			goto gotit;
801 	}
802 	start = cgp->cg_irotor / NBBY;
803 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
804 	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
805 	if (loc == 0) {
806 		len = start + 1;
807 		start = 0;
808 		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
809 		if (loc == 0) {
810 			printf("cg = %s, irotor = %d, fs = %s\n",
811 			    cg, cgp->cg_irotor, fs->fs_fsmnt);
812 			panic("ialloccg: map corrupted");
813 			/* NOTREACHED */
814 		}
815 	}
816 	i = start + len - loc;
817 	map = cg_inosused(cgp)[i];
818 	ipref = i * NBBY;
819 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
820 		if ((map & i) == 0) {
821 			cgp->cg_irotor = ipref;
822 			goto gotit;
823 		}
824 	}
825 	printf("fs = %s\n", fs->fs_fsmnt);
826 	panic("ialloccg: block not in map");
827 	/* NOTREACHED */
828 gotit:
829 	setbit(cg_inosused(cgp), ipref);
830 	cgp->cg_cs.cs_nifree--;
831 	fs->fs_cstotal.cs_nifree--;
832 	fs->fs_cs(fs, cg).cs_nifree--;
833 	fs->fs_fmod++;
834 	if ((mode & IFMT) == IFDIR) {
835 		cgp->cg_cs.cs_ndir++;
836 		fs->fs_cstotal.cs_ndir++;
837 		fs->fs_cs(fs, cg).cs_ndir++;
838 	}
839 	bdwrite(bp);
840 	return (cg * fs->fs_ipg + ipref);
841 }
842 
843 /*
844  * Free a block or fragment.
845  *
846  * The specified block or fragment is placed back in the
847  * free map. If a fragment is deallocated, a possible
848  * block reassembly is checked.
849  */
850 blkfree(ip, bno, size)
851 	register struct inode *ip;
852 	daddr_t bno;
853 	off_t size;
854 {
855 	register struct fs *fs;
856 	register struct cg *cgp;
857 	struct buf *bp;
858 	int error, cg, blk, frags, bbase;
859 	register int i;
860 
861 	fs = ip->i_fs;
862 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
863 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
864 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
865 		panic("blkfree: bad size");
866 	}
867 	cg = dtog(fs, bno);
868 	if (badblock(fs, bno)) {
869 		printf("bad block %d, ino %d\n", bno, ip->i_number);
870 		return;
871 	}
872 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
873 		(int)fs->fs_cgsize, NOCRED, &bp);
874 	if (error) {
875 		brelse(bp);
876 		return;
877 	}
878 	cgp = bp->b_un.b_cg;
879 	if (!cg_chkmagic(cgp)) {
880 		brelse(bp);
881 		return;
882 	}
883 	cgp->cg_time = time.tv_sec;
884 	bno = dtogd(fs, bno);
885 	if (size == fs->fs_bsize) {
886 		if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
887 			printf("dev = 0x%x, block = %d, fs = %s\n",
888 			    ip->i_dev, bno, fs->fs_fsmnt);
889 			panic("blkfree: freeing free block");
890 		}
891 		setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
892 		cgp->cg_cs.cs_nbfree++;
893 		fs->fs_cstotal.cs_nbfree++;
894 		fs->fs_cs(fs, cg).cs_nbfree++;
895 		i = cbtocylno(fs, bno);
896 		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
897 		cg_blktot(cgp)[i]++;
898 	} else {
899 		bbase = bno - fragnum(fs, bno);
900 		/*
901 		 * decrement the counts associated with the old frags
902 		 */
903 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
904 		fragacct(fs, blk, cgp->cg_frsum, -1);
905 		/*
906 		 * deallocate the fragment
907 		 */
908 		frags = numfrags(fs, size);
909 		for (i = 0; i < frags; i++) {
910 			if (isset(cg_blksfree(cgp), bno + i)) {
911 				printf("dev = 0x%x, block = %d, fs = %s\n",
912 				    ip->i_dev, bno + i, fs->fs_fsmnt);
913 				panic("blkfree: freeing free frag");
914 			}
915 			setbit(cg_blksfree(cgp), bno + i);
916 		}
917 		cgp->cg_cs.cs_nffree += i;
918 		fs->fs_cstotal.cs_nffree += i;
919 		fs->fs_cs(fs, cg).cs_nffree += i;
920 		/*
921 		 * add back in counts associated with the new frags
922 		 */
923 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
924 		fragacct(fs, blk, cgp->cg_frsum, 1);
925 		/*
926 		 * if a complete block has been reassembled, account for it
927 		 */
928 		if (isblock(fs, cg_blksfree(cgp),
929 		    (daddr_t)fragstoblks(fs, bbase))) {
930 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
931 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
932 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
933 			cgp->cg_cs.cs_nbfree++;
934 			fs->fs_cstotal.cs_nbfree++;
935 			fs->fs_cs(fs, cg).cs_nbfree++;
936 			i = cbtocylno(fs, bbase);
937 			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
938 			cg_blktot(cgp)[i]++;
939 		}
940 	}
941 	fs->fs_fmod++;
942 	bdwrite(bp);
943 }
944 
945 /*
946  * Free an inode.
947  *
948  * The specified inode is placed back in the free map.
949  */
950 ifree(ip, ino, mode)
951 	struct inode *ip;
952 	ino_t ino;
953 	int mode;
954 {
955 	register struct fs *fs;
956 	register struct cg *cgp;
957 	struct buf *bp;
958 	int error, cg;
959 
960 	fs = ip->i_fs;
961 	if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
962 		printf("dev = 0x%x, ino = %d, fs = %s\n",
963 		    ip->i_dev, ino, fs->fs_fsmnt);
964 		panic("ifree: range");
965 	}
966 	cg = itog(fs, ino);
967 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
968 		(int)fs->fs_cgsize, NOCRED, &bp);
969 	if (error) {
970 		brelse(bp);
971 		return;
972 	}
973 	cgp = bp->b_un.b_cg;
974 	if (!cg_chkmagic(cgp)) {
975 		brelse(bp);
976 		return;
977 	}
978 	cgp->cg_time = time.tv_sec;
979 	ino %= fs->fs_ipg;
980 	if (isclr(cg_inosused(cgp), ino)) {
981 		printf("dev = 0x%x, ino = %d, fs = %s\n",
982 		    ip->i_dev, ino, fs->fs_fsmnt);
983 		panic("ifree: freeing free inode");
984 	}
985 	clrbit(cg_inosused(cgp), ino);
986 	if (ino < cgp->cg_irotor)
987 		cgp->cg_irotor = ino;
988 	cgp->cg_cs.cs_nifree++;
989 	fs->fs_cstotal.cs_nifree++;
990 	fs->fs_cs(fs, cg).cs_nifree++;
991 	if ((mode & IFMT) == IFDIR) {
992 		cgp->cg_cs.cs_ndir--;
993 		fs->fs_cstotal.cs_ndir--;
994 		fs->fs_cs(fs, cg).cs_ndir--;
995 	}
996 	fs->fs_fmod++;
997 	bdwrite(bp);
998 }
999 
1000 /*
1001  * Find a block of the specified size in the specified cylinder group.
1002  *
1003  * It is a panic if a request is made to find a block if none are
1004  * available.
1005  */
1006 daddr_t
1007 mapsearch(fs, cgp, bpref, allocsiz)
1008 	register struct fs *fs;
1009 	register struct cg *cgp;
1010 	daddr_t bpref;
1011 	int allocsiz;
1012 {
1013 	daddr_t bno;
1014 	int start, len, loc, i;
1015 	int blk, field, subfield, pos;
1016 
1017 	/*
1018 	 * find the fragment by searching through the free block
1019 	 * map for an appropriate bit pattern
1020 	 */
1021 	if (bpref)
1022 		start = dtogd(fs, bpref) / NBBY;
1023 	else
1024 		start = cgp->cg_frotor / NBBY;
1025 	len = howmany(fs->fs_fpg, NBBY) - start;
1026 	loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start],
1027 		(u_char *)fragtbl[fs->fs_frag],
1028 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1029 	if (loc == 0) {
1030 		len = start + 1;
1031 		start = 0;
1032 		loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0],
1033 			(u_char *)fragtbl[fs->fs_frag],
1034 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1035 		if (loc == 0) {
1036 			printf("start = %d, len = %d, fs = %s\n",
1037 			    start, len, fs->fs_fsmnt);
1038 			panic("alloccg: map corrupted");
1039 			/* NOTREACHED */
1040 		}
1041 	}
1042 	bno = (start + len - loc) * NBBY;
1043 	cgp->cg_frotor = bno;
1044 	/*
1045 	 * found the byte in the map
1046 	 * sift through the bits to find the selected frag
1047 	 */
1048 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1049 		blk = blkmap(fs, cg_blksfree(cgp), bno);
1050 		blk <<= 1;
1051 		field = around[allocsiz];
1052 		subfield = inside[allocsiz];
1053 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1054 			if ((blk & field) == subfield)
1055 				return (bno + pos);
1056 			field <<= 1;
1057 			subfield <<= 1;
1058 		}
1059 	}
1060 	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1061 	panic("alloccg: block not in map");
1062 	return (-1);
1063 }
1064 
1065 /*
1066  * Fserr prints the name of a file system with an error diagnostic.
1067  *
1068  * The form of the error message is:
1069  *	fs: error message
1070  */
1071 fserr(fs, cp)
1072 	struct fs *fs;
1073 	char *cp;
1074 {
1075 
1076 	log(LOG_ERR, "%s: %s\n", fs->fs_fsmnt, cp);
1077 }
1078