xref: /csrg-svn/sys/ufs/lfs/lfs_alloc.c (revision 25471)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)lfs_alloc.c	6.18 (Berkeley) 11/11/85
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "mount.h"
12 #include "fs.h"
13 #include "buf.h"
14 #include "inode.h"
15 #include "dir.h"
16 #include "user.h"
17 #include "quota.h"
18 #include "kernel.h"
19 #include "syslog.h"
20 
21 extern u_long		hashalloc();
22 extern ino_t		ialloccg();
23 extern daddr_t		alloccg();
24 extern daddr_t		alloccgblk();
25 extern daddr_t		fragextend();
26 extern daddr_t		blkpref();
27 extern daddr_t		mapsearch();
28 extern int		inside[], around[];
29 extern unsigned char	*fragtbl[];
30 
31 /*
32  * Allocate a block in the file system.
33  *
34  * The size of the requested block is given, which must be some
35  * multiple of fs_fsize and <= fs_bsize.
36  * A preference may be optionally specified. If a preference is given
37  * the following hierarchy is used to allocate a block:
38  *   1) allocate the requested block.
39  *   2) allocate a rotationally optimal block in the same cylinder.
40  *   3) allocate a block in the same cylinder group.
41  *   4) quadradically rehash into other cylinder groups, until an
42  *      available block is located.
43  * If no block preference is given the following heirarchy is used
44  * to allocate a block:
45  *   1) allocate a block in the cylinder group that contains the
46  *      inode for the file.
47  *   2) quadradically rehash into other cylinder groups, until an
48  *      available block is located.
49  */
50 struct buf *
51 alloc(ip, bpref, size)
52 	register struct inode *ip;
53 	daddr_t bpref;
54 	int size;
55 {
56 	daddr_t bno;
57 	register struct fs *fs;
58 	register struct buf *bp;
59 	int cg;
60 
61 	fs = ip->i_fs;
62 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
63 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
64 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
65 		panic("alloc: bad size");
66 	}
67 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
68 		goto nospace;
69 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
70 		goto nospace;
71 #ifdef QUOTA
72 	u.u_error = chkdq(ip, (long)btodb(size), 0);
73 	if (u.u_error)
74 		return (NULL);
75 #endif
76 	if (bpref >= fs->fs_size)
77 		bpref = 0;
78 	if (bpref == 0)
79 		cg = itog(fs, ip->i_number);
80 	else
81 		cg = dtog(fs, bpref);
82 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
83 		(u_long (*)())alloccg);
84 	if (bno <= 0)
85 		goto nospace;
86 	ip->i_blocks += btodb(size);
87 	ip->i_flag |= IUPD|ICHG;
88 	bp = getblk(ip->i_dev, fsbtodb(fs, bno), size);
89 	clrbuf(bp);
90 	return (bp);
91 nospace:
92 	fserr(fs, "file system full");
93 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
94 	u.u_error = ENOSPC;
95 	return (NULL);
96 }
97 
98 /*
99  * Reallocate a fragment to a bigger size
100  *
101  * The number and size of the old block is given, and a preference
102  * and new size is also specified. The allocator attempts to extend
103  * the original block. Failing that, the regular block allocator is
104  * invoked to get an appropriate block.
105  */
106 struct buf *
107 realloccg(ip, bprev, bpref, osize, nsize)
108 	register struct inode *ip;
109 	daddr_t bprev, bpref;
110 	int osize, nsize;
111 {
112 	register struct fs *fs;
113 	register struct buf *bp, *obp;
114 	int cg, request;
115 	daddr_t bno, bn;
116 	int i, count, s;
117 	extern struct cmap *mfind();
118 
119 	fs = ip->i_fs;
120 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
121 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
122 		printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
123 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
124 		panic("realloccg: bad size");
125 	}
126 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
127 		goto nospace;
128 	if (bprev == 0) {
129 		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
130 		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
131 		panic("realloccg: bad bprev");
132 	}
133 #ifdef QUOTA
134 	u.u_error = chkdq(ip, (long)btodb(nsize - osize), 0);
135 	if (u.u_error)
136 		return (NULL);
137 #endif
138 	cg = dtog(fs, bprev);
139 	bno = fragextend(ip, cg, (long)bprev, osize, nsize);
140 	if (bno != 0) {
141 		do {
142 			bp = bread(ip->i_dev, fsbtodb(fs, bno), osize);
143 			if (bp->b_flags & B_ERROR) {
144 				brelse(bp);
145 				return (NULL);
146 			}
147 		} while (brealloc(bp, nsize) == 0);
148 		bp->b_flags |= B_DONE;
149 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
150 		ip->i_blocks += btodb(nsize - osize);
151 		ip->i_flag |= IUPD|ICHG;
152 		return (bp);
153 	}
154 	if (bpref >= fs->fs_size)
155 		bpref = 0;
156 	switch (fs->fs_optim) {
157 	case FS_OPTSPACE:
158 		/*
159 		 * Allocate an exact sized fragment. Although this makes
160 		 * best use of space, we will waste time relocating it if
161 		 * the file continues to grow. If the fragmentation is
162 		 * less than half of the minimum free reserve, we choose
163 		 * to begin optimizing for time.
164 		 */
165 		request = nsize;
166 		if (fs->fs_minfree < 5 ||
167 		    fs->fs_cstotal.cs_nffree >
168 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
169 			break;
170 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
171 			fs->fs_fsmnt);
172 		fs->fs_optim = FS_OPTTIME;
173 		break;
174 	case FS_OPTTIME:
175 		/*
176 		 * At this point we have discovered a file that is trying
177 		 * to grow a small fragment to a larger fragment. To save
178 		 * time, we allocate a full sized block, then free the
179 		 * unused portion. If the file continues to grow, the
180 		 * `fragextend' call above will be able to grow it in place
181 		 * without further copying. If aberrant programs cause
182 		 * disk fragmentation to grow within 2% of the free reserve,
183 		 * we choose to begin optimizing for space.
184 		 */
185 		request = fs->fs_bsize;
186 		if (fs->fs_cstotal.cs_nffree <
187 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
188 			break;
189 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
190 			fs->fs_fsmnt);
191 		fs->fs_optim = FS_OPTSPACE;
192 		break;
193 	default:
194 		printf("dev = 0x%x, optim = %d, fs = %s\n",
195 		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
196 		panic("realloccg: bad optim");
197 		/* NOTREACHED */
198 	}
199 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
200 		(u_long (*)())alloccg);
201 	if (bno > 0) {
202 		obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize);
203 		if (obp->b_flags & B_ERROR) {
204 			brelse(obp);
205 			return (NULL);
206 		}
207 		bn = fsbtodb(fs, bno);
208 		bp = getblk(ip->i_dev, bn, nsize);
209 		bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize);
210 		count = howmany(osize, DEV_BSIZE);
211 		s = splimp();
212 		for (i = 0; i < count; i += CLBYTES / DEV_BSIZE)
213 			if (mfind(ip->i_dev, bn + i))
214 				munhash(ip->i_dev, bn + i);
215 		splx(s);
216 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
217 		if (obp->b_flags & B_DELWRI) {
218 			obp->b_flags &= ~B_DELWRI;
219 			u.u_ru.ru_oublock--;		/* delete charge */
220 		}
221 		brelse(obp);
222 		free(ip, bprev, (off_t)osize);
223 		if (nsize < request)
224 			free(ip, bno + numfrags(fs, nsize),
225 				(off_t)(request - nsize));
226 		ip->i_blocks += btodb(nsize - osize);
227 		ip->i_flag |= IUPD|ICHG;
228 		return (bp);
229 	}
230 nospace:
231 	/*
232 	 * no space available
233 	 */
234 	fserr(fs, "file system full");
235 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
236 	u.u_error = ENOSPC;
237 	return (NULL);
238 }
239 
240 /*
241  * Allocate an inode in the file system.
242  *
243  * A preference may be optionally specified. If a preference is given
244  * the following hierarchy is used to allocate an inode:
245  *   1) allocate the requested inode.
246  *   2) allocate an inode in the same cylinder group.
247  *   3) quadradically rehash into other cylinder groups, until an
248  *      available inode is located.
249  * If no inode preference is given the following heirarchy is used
250  * to allocate an inode:
251  *   1) allocate an inode in cylinder group 0.
252  *   2) quadradically rehash into other cylinder groups, until an
253  *      available inode is located.
254  */
255 struct inode *
256 ialloc(pip, ipref, mode)
257 	register struct inode *pip;
258 	ino_t ipref;
259 	int mode;
260 {
261 	ino_t ino;
262 	register struct fs *fs;
263 	register struct inode *ip;
264 	int cg;
265 
266 	fs = pip->i_fs;
267 	if (fs->fs_cstotal.cs_nifree == 0)
268 		goto noinodes;
269 #ifdef QUOTA
270 	u.u_error = chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0);
271 	if (u.u_error)
272 		return (NULL);
273 #endif
274 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
275 		ipref = 0;
276 	cg = itog(fs, ipref);
277 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
278 	if (ino == 0)
279 		goto noinodes;
280 	ip = iget(pip->i_dev, pip->i_fs, ino);
281 	if (ip == NULL) {
282 		ifree(pip, ino, 0);
283 		return (NULL);
284 	}
285 	if (ip->i_mode) {
286 		printf("mode = 0%o, inum = %d, fs = %s\n",
287 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
288 		panic("ialloc: dup alloc");
289 	}
290 	if (ip->i_blocks) {				/* XXX */
291 		printf("free inode %s/%d had %d blocks\n",
292 		    fs->fs_fsmnt, ino, ip->i_blocks);
293 		ip->i_blocks = 0;
294 	}
295 	return (ip);
296 noinodes:
297 	fserr(fs, "out of inodes");
298 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
299 	u.u_error = ENOSPC;
300 	return (NULL);
301 }
302 
303 /*
304  * Find a cylinder to place a directory.
305  *
306  * The policy implemented by this algorithm is to select from
307  * among those cylinder groups with above the average number of
308  * free inodes, the one with the smallest number of directories.
309  */
310 ino_t
311 dirpref(fs)
312 	register struct fs *fs;
313 {
314 	int cg, minndir, mincg, avgifree;
315 
316 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
317 	minndir = fs->fs_ipg;
318 	mincg = 0;
319 	for (cg = 0; cg < fs->fs_ncg; cg++)
320 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
321 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
322 			mincg = cg;
323 			minndir = fs->fs_cs(fs, cg).cs_ndir;
324 		}
325 	return ((ino_t)(fs->fs_ipg * mincg));
326 }
327 
328 /*
329  * Select the desired position for the next block in a file.  The file is
330  * logically divided into sections. The first section is composed of the
331  * direct blocks. Each additional section contains fs_maxbpg blocks.
332  *
333  * If no blocks have been allocated in the first section, the policy is to
334  * request a block in the same cylinder group as the inode that describes
335  * the file. If no blocks have been allocated in any other section, the
336  * policy is to place the section in a cylinder group with a greater than
337  * average number of free blocks.  An appropriate cylinder group is found
338  * by using a rotor that sweeps the cylinder groups. When a new group of
339  * blocks is needed, the sweep begins in the cylinder group following the
340  * cylinder group from which the previous allocation was made. The sweep
341  * continues until a cylinder group with greater than the average number
342  * of free blocks is found. If the allocation is for the first block in an
343  * indirect block, the information on the previous allocation is unavailable;
344  * here a best guess is made based upon the logical block number being
345  * allocated.
346  *
347  * If a section is already partially allocated, the policy is to
348  * contiguously allocate fs_maxcontig blocks.  The end of one of these
349  * contiguous blocks and the beginning of the next is physically separated
350  * so that the disk head will be in transit between them for at least
351  * fs_rotdelay milliseconds.  This is to allow time for the processor to
352  * schedule another I/O transfer.
353  */
354 daddr_t
355 blkpref(ip, lbn, indx, bap)
356 	struct inode *ip;
357 	daddr_t lbn;
358 	int indx;
359 	daddr_t *bap;
360 {
361 	register struct fs *fs;
362 	register int cg;
363 	int avgbfree, startcg;
364 	daddr_t nextblk;
365 
366 	fs = ip->i_fs;
367 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
368 		if (lbn < NDADDR) {
369 			cg = itog(fs, ip->i_number);
370 			return (fs->fs_fpg * cg + fs->fs_frag);
371 		}
372 		/*
373 		 * Find a cylinder with greater than average number of
374 		 * unused data blocks.
375 		 */
376 		if (indx == 0 || bap[indx - 1] == 0)
377 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
378 		else
379 			startcg = dtog(fs, bap[indx - 1]) + 1;
380 		startcg %= fs->fs_ncg;
381 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
382 		for (cg = startcg; cg < fs->fs_ncg; cg++)
383 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
384 				fs->fs_cgrotor = cg;
385 				return (fs->fs_fpg * cg + fs->fs_frag);
386 			}
387 		for (cg = 0; cg <= startcg; cg++)
388 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
389 				fs->fs_cgrotor = cg;
390 				return (fs->fs_fpg * cg + fs->fs_frag);
391 			}
392 		return (NULL);
393 	}
394 	/*
395 	 * One or more previous blocks have been laid out. If less
396 	 * than fs_maxcontig previous blocks are contiguous, the
397 	 * next block is requested contiguously, otherwise it is
398 	 * requested rotationally delayed by fs_rotdelay milliseconds.
399 	 */
400 	nextblk = bap[indx - 1] + fs->fs_frag;
401 	if (indx > fs->fs_maxcontig &&
402 	    bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
403 	    != nextblk)
404 		return (nextblk);
405 	if (fs->fs_rotdelay != 0)
406 		/*
407 		 * Here we convert ms of delay to frags as:
408 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
409 		 *	((sect/frag) * (ms/sec))
410 		 * then round up to the next block.
411 		 */
412 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
413 		    (NSPF(fs) * 1000), fs->fs_frag);
414 	return (nextblk);
415 }
416 
417 /*
418  * Implement the cylinder overflow algorithm.
419  *
420  * The policy implemented by this algorithm is:
421  *   1) allocate the block in its requested cylinder group.
422  *   2) quadradically rehash on the cylinder group number.
423  *   3) brute force search for a free block.
424  */
425 /*VARARGS5*/
426 u_long
427 hashalloc(ip, cg, pref, size, allocator)
428 	struct inode *ip;
429 	int cg;
430 	long pref;
431 	int size;	/* size for data blocks, mode for inodes */
432 	u_long (*allocator)();
433 {
434 	register struct fs *fs;
435 	long result;
436 	int i, icg = cg;
437 
438 	fs = ip->i_fs;
439 	/*
440 	 * 1: preferred cylinder group
441 	 */
442 	result = (*allocator)(ip, cg, pref, size);
443 	if (result)
444 		return (result);
445 	/*
446 	 * 2: quadratic rehash
447 	 */
448 	for (i = 1; i < fs->fs_ncg; i *= 2) {
449 		cg += i;
450 		if (cg >= fs->fs_ncg)
451 			cg -= fs->fs_ncg;
452 		result = (*allocator)(ip, cg, 0, size);
453 		if (result)
454 			return (result);
455 	}
456 	/*
457 	 * 3: brute force search
458 	 * Note that we start at i == 2, since 0 was checked initially,
459 	 * and 1 is always checked in the quadratic rehash.
460 	 */
461 	cg = (icg + 2) % fs->fs_ncg;
462 	for (i = 2; i < fs->fs_ncg; i++) {
463 		result = (*allocator)(ip, cg, 0, size);
464 		if (result)
465 			return (result);
466 		cg++;
467 		if (cg == fs->fs_ncg)
468 			cg = 0;
469 	}
470 	return (NULL);
471 }
472 
473 /*
474  * Determine whether a fragment can be extended.
475  *
476  * Check to see if the necessary fragments are available, and
477  * if they are, allocate them.
478  */
479 daddr_t
480 fragextend(ip, cg, bprev, osize, nsize)
481 	struct inode *ip;
482 	int cg;
483 	long bprev;
484 	int osize, nsize;
485 {
486 	register struct fs *fs;
487 	register struct buf *bp;
488 	register struct cg *cgp;
489 	long bno;
490 	int frags, bbase;
491 	int i;
492 
493 	fs = ip->i_fs;
494 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
495 		return (NULL);
496 	frags = numfrags(fs, nsize);
497 	bbase = fragnum(fs, bprev);
498 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
499 		/* cannot extend across a block boundry */
500 		return (NULL);
501 	}
502 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
503 	cgp = bp->b_un.b_cg;
504 	if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) {
505 		brelse(bp);
506 		return (NULL);
507 	}
508 	cgp->cg_time = time.tv_sec;
509 	bno = dtogd(fs, bprev);
510 	for (i = numfrags(fs, osize); i < frags; i++)
511 		if (isclr(cgp->cg_free, bno + i)) {
512 			brelse(bp);
513 			return (NULL);
514 		}
515 	/*
516 	 * the current fragment can be extended
517 	 * deduct the count on fragment being extended into
518 	 * increase the count on the remaining fragment (if any)
519 	 * allocate the extended piece
520 	 */
521 	for (i = frags; i < fs->fs_frag - bbase; i++)
522 		if (isclr(cgp->cg_free, bno + i))
523 			break;
524 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
525 	if (i != frags)
526 		cgp->cg_frsum[i - frags]++;
527 	for (i = numfrags(fs, osize); i < frags; i++) {
528 		clrbit(cgp->cg_free, bno + i);
529 		cgp->cg_cs.cs_nffree--;
530 		fs->fs_cstotal.cs_nffree--;
531 		fs->fs_cs(fs, cg).cs_nffree--;
532 	}
533 	fs->fs_fmod++;
534 	bdwrite(bp);
535 	return (bprev);
536 }
537 
538 /*
539  * Determine whether a block can be allocated.
540  *
541  * Check to see if a block of the apprpriate size is available,
542  * and if it is, allocate it.
543  */
544 daddr_t
545 alloccg(ip, cg, bpref, size)
546 	struct inode *ip;
547 	int cg;
548 	daddr_t bpref;
549 	int size;
550 {
551 	register struct fs *fs;
552 	register struct buf *bp;
553 	register struct cg *cgp;
554 	int bno, frags;
555 	int allocsiz;
556 	register int i;
557 
558 	fs = ip->i_fs;
559 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
560 		return (NULL);
561 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
562 	cgp = bp->b_un.b_cg;
563 	if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC ||
564 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
565 		brelse(bp);
566 		return (NULL);
567 	}
568 	cgp->cg_time = time.tv_sec;
569 	if (size == fs->fs_bsize) {
570 		bno = alloccgblk(fs, cgp, bpref);
571 		bdwrite(bp);
572 		return (bno);
573 	}
574 	/*
575 	 * check to see if any fragments are already available
576 	 * allocsiz is the size which will be allocated, hacking
577 	 * it down to a smaller size if necessary
578 	 */
579 	frags = numfrags(fs, size);
580 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
581 		if (cgp->cg_frsum[allocsiz] != 0)
582 			break;
583 	if (allocsiz == fs->fs_frag) {
584 		/*
585 		 * no fragments were available, so a block will be
586 		 * allocated, and hacked up
587 		 */
588 		if (cgp->cg_cs.cs_nbfree == 0) {
589 			brelse(bp);
590 			return (NULL);
591 		}
592 		bno = alloccgblk(fs, cgp, bpref);
593 		bpref = dtogd(fs, bno);
594 		for (i = frags; i < fs->fs_frag; i++)
595 			setbit(cgp->cg_free, bpref + i);
596 		i = fs->fs_frag - frags;
597 		cgp->cg_cs.cs_nffree += i;
598 		fs->fs_cstotal.cs_nffree += i;
599 		fs->fs_cs(fs, cg).cs_nffree += i;
600 		fs->fs_fmod++;
601 		cgp->cg_frsum[i]++;
602 		bdwrite(bp);
603 		return (bno);
604 	}
605 	bno = mapsearch(fs, cgp, bpref, allocsiz);
606 	if (bno < 0) {
607 		brelse(bp);
608 		return (NULL);
609 	}
610 	for (i = 0; i < frags; i++)
611 		clrbit(cgp->cg_free, bno + i);
612 	cgp->cg_cs.cs_nffree -= frags;
613 	fs->fs_cstotal.cs_nffree -= frags;
614 	fs->fs_cs(fs, cg).cs_nffree -= frags;
615 	fs->fs_fmod++;
616 	cgp->cg_frsum[allocsiz]--;
617 	if (frags != allocsiz)
618 		cgp->cg_frsum[allocsiz - frags]++;
619 	bdwrite(bp);
620 	return (cg * fs->fs_fpg + bno);
621 }
622 
623 /*
624  * Allocate a block in a cylinder group.
625  *
626  * This algorithm implements the following policy:
627  *   1) allocate the requested block.
628  *   2) allocate a rotationally optimal block in the same cylinder.
629  *   3) allocate the next available block on the block rotor for the
630  *      specified cylinder group.
631  * Note that this routine only allocates fs_bsize blocks; these
632  * blocks may be fragmented by the routine that allocates them.
633  */
634 daddr_t
635 alloccgblk(fs, cgp, bpref)
636 	register struct fs *fs;
637 	register struct cg *cgp;
638 	daddr_t bpref;
639 {
640 	daddr_t bno;
641 	int cylno, pos, delta;
642 	short *cylbp;
643 	register int i;
644 
645 	if (bpref == 0) {
646 		bpref = cgp->cg_rotor;
647 		goto norot;
648 	}
649 	bpref = blknum(fs, bpref);
650 	bpref = dtogd(fs, bpref);
651 	/*
652 	 * if the requested block is available, use it
653 	 */
654 	if (isblock(fs, cgp->cg_free, fragstoblks(fs, bpref))) {
655 		bno = bpref;
656 		goto gotit;
657 	}
658 	/*
659 	 * check for a block available on the same cylinder
660 	 */
661 	cylno = cbtocylno(fs, bpref);
662 	if (cgp->cg_btot[cylno] == 0)
663 		goto norot;
664 	if (fs->fs_cpc == 0) {
665 		/*
666 		 * block layout info is not available, so just have
667 		 * to take any block in this cylinder.
668 		 */
669 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
670 		goto norot;
671 	}
672 	/*
673 	 * check the summary information to see if a block is
674 	 * available in the requested cylinder starting at the
675 	 * requested rotational position and proceeding around.
676 	 */
677 	cylbp = cgp->cg_b[cylno];
678 	pos = cbtorpos(fs, bpref);
679 	for (i = pos; i < NRPOS; i++)
680 		if (cylbp[i] > 0)
681 			break;
682 	if (i == NRPOS)
683 		for (i = 0; i < pos; i++)
684 			if (cylbp[i] > 0)
685 				break;
686 	if (cylbp[i] > 0) {
687 		/*
688 		 * found a rotational position, now find the actual
689 		 * block. A panic if none is actually there.
690 		 */
691 		pos = cylno % fs->fs_cpc;
692 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
693 		if (fs->fs_postbl[pos][i] == -1) {
694 			printf("pos = %d, i = %d, fs = %s\n",
695 			    pos, i, fs->fs_fsmnt);
696 			panic("alloccgblk: cyl groups corrupted");
697 		}
698 		for (i = fs->fs_postbl[pos][i];; ) {
699 			if (isblock(fs, cgp->cg_free, bno + i)) {
700 				bno = blkstofrags(fs, (bno + i));
701 				goto gotit;
702 			}
703 			delta = fs->fs_rotbl[i];
704 			if (delta <= 0 || delta > MAXBPC - i)
705 				break;
706 			i += delta;
707 		}
708 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
709 		panic("alloccgblk: can't find blk in cyl");
710 	}
711 norot:
712 	/*
713 	 * no blocks in the requested cylinder, so take next
714 	 * available one in this cylinder group.
715 	 */
716 	bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
717 	if (bno < 0)
718 		return (NULL);
719 	cgp->cg_rotor = bno;
720 gotit:
721 	clrblock(fs, cgp->cg_free, (long)fragstoblks(fs, bno));
722 	cgp->cg_cs.cs_nbfree--;
723 	fs->fs_cstotal.cs_nbfree--;
724 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
725 	cylno = cbtocylno(fs, bno);
726 	cgp->cg_b[cylno][cbtorpos(fs, bno)]--;
727 	cgp->cg_btot[cylno]--;
728 	fs->fs_fmod++;
729 	return (cgp->cg_cgx * fs->fs_fpg + bno);
730 }
731 
732 /*
733  * Determine whether an inode can be allocated.
734  *
735  * Check to see if an inode is available, and if it is,
736  * allocate it using the following policy:
737  *   1) allocate the requested inode.
738  *   2) allocate the next available inode after the requested
739  *      inode in the specified cylinder group.
740  */
741 ino_t
742 ialloccg(ip, cg, ipref, mode)
743 	struct inode *ip;
744 	int cg;
745 	daddr_t ipref;
746 	int mode;
747 {
748 	register struct fs *fs;
749 	register struct cg *cgp;
750 	struct buf *bp;
751 	int start, len, loc, map, i;
752 
753 	fs = ip->i_fs;
754 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
755 		return (NULL);
756 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
757 	cgp = bp->b_un.b_cg;
758 	if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC ||
759 	    cgp->cg_cs.cs_nifree == 0) {
760 		brelse(bp);
761 		return (NULL);
762 	}
763 	cgp->cg_time = time.tv_sec;
764 	if (ipref) {
765 		ipref %= fs->fs_ipg;
766 		if (isclr(cgp->cg_iused, ipref))
767 			goto gotit;
768 	}
769 	start = cgp->cg_irotor / NBBY;
770 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
771 	loc = skpc(0xff, len, &cgp->cg_iused[start]);
772 	if (loc == 0) {
773 		len = start + 1;
774 		start = 0;
775 		loc = skpc(0xff, len, &cgp->cg_iused[0]);
776 		if (loc == 0) {
777 			printf("cg = %s, irotor = %d, fs = %s\n",
778 			    cg, cgp->cg_irotor, fs->fs_fsmnt);
779 			panic("ialloccg: map corrupted");
780 			/* NOTREACHED */
781 		}
782 	}
783 	i = start + len - loc;
784 	map = cgp->cg_iused[i];
785 	ipref = i * NBBY;
786 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
787 		if ((map & i) == 0) {
788 			cgp->cg_irotor = ipref;
789 			goto gotit;
790 		}
791 	}
792 	printf("fs = %s\n", fs->fs_fsmnt);
793 	panic("ialloccg: block not in map");
794 	/* NOTREACHED */
795 gotit:
796 	setbit(cgp->cg_iused, ipref);
797 	cgp->cg_cs.cs_nifree--;
798 	fs->fs_cstotal.cs_nifree--;
799 	fs->fs_cs(fs, cg).cs_nifree--;
800 	fs->fs_fmod++;
801 	if ((mode & IFMT) == IFDIR) {
802 		cgp->cg_cs.cs_ndir++;
803 		fs->fs_cstotal.cs_ndir++;
804 		fs->fs_cs(fs, cg).cs_ndir++;
805 	}
806 	bdwrite(bp);
807 	return (cg * fs->fs_ipg + ipref);
808 }
809 
810 /*
811  * Free a block or fragment.
812  *
813  * The specified block or fragment is placed back in the
814  * free map. If a fragment is deallocated, a possible
815  * block reassembly is checked.
816  */
817 free(ip, bno, size)
818 	register struct inode *ip;
819 	daddr_t bno;
820 	off_t size;
821 {
822 	register struct fs *fs;
823 	register struct cg *cgp;
824 	register struct buf *bp;
825 	int cg, blk, frags, bbase;
826 	register int i;
827 
828 	fs = ip->i_fs;
829 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
830 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
831 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
832 		panic("free: bad size");
833 	}
834 	cg = dtog(fs, bno);
835 	if (badblock(fs, bno)) {
836 		printf("bad block %d, ino %d\n", bno, ip->i_number);
837 		return;
838 	}
839 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
840 	cgp = bp->b_un.b_cg;
841 	if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) {
842 		brelse(bp);
843 		return;
844 	}
845 	cgp->cg_time = time.tv_sec;
846 	bno = dtogd(fs, bno);
847 	if (size == fs->fs_bsize) {
848 		if (isblock(fs, cgp->cg_free, fragstoblks(fs, bno))) {
849 			printf("dev = 0x%x, block = %d, fs = %s\n",
850 			    ip->i_dev, bno, fs->fs_fsmnt);
851 			panic("free: freeing free block");
852 		}
853 		setblock(fs, cgp->cg_free, fragstoblks(fs, bno));
854 		cgp->cg_cs.cs_nbfree++;
855 		fs->fs_cstotal.cs_nbfree++;
856 		fs->fs_cs(fs, cg).cs_nbfree++;
857 		i = cbtocylno(fs, bno);
858 		cgp->cg_b[i][cbtorpos(fs, bno)]++;
859 		cgp->cg_btot[i]++;
860 	} else {
861 		bbase = bno - fragnum(fs, bno);
862 		/*
863 		 * decrement the counts associated with the old frags
864 		 */
865 		blk = blkmap(fs, cgp->cg_free, bbase);
866 		fragacct(fs, blk, cgp->cg_frsum, -1);
867 		/*
868 		 * deallocate the fragment
869 		 */
870 		frags = numfrags(fs, size);
871 		for (i = 0; i < frags; i++) {
872 			if (isset(cgp->cg_free, bno + i)) {
873 				printf("dev = 0x%x, block = %d, fs = %s\n",
874 				    ip->i_dev, bno + i, fs->fs_fsmnt);
875 				panic("free: freeing free frag");
876 			}
877 			setbit(cgp->cg_free, bno + i);
878 		}
879 		cgp->cg_cs.cs_nffree += i;
880 		fs->fs_cstotal.cs_nffree += i;
881 		fs->fs_cs(fs, cg).cs_nffree += i;
882 		/*
883 		 * add back in counts associated with the new frags
884 		 */
885 		blk = blkmap(fs, cgp->cg_free, bbase);
886 		fragacct(fs, blk, cgp->cg_frsum, 1);
887 		/*
888 		 * if a complete block has been reassembled, account for it
889 		 */
890 		if (isblock(fs, cgp->cg_free, fragstoblks(fs, bbase))) {
891 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
892 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
893 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
894 			cgp->cg_cs.cs_nbfree++;
895 			fs->fs_cstotal.cs_nbfree++;
896 			fs->fs_cs(fs, cg).cs_nbfree++;
897 			i = cbtocylno(fs, bbase);
898 			cgp->cg_b[i][cbtorpos(fs, bbase)]++;
899 			cgp->cg_btot[i]++;
900 		}
901 	}
902 	fs->fs_fmod++;
903 	bdwrite(bp);
904 }
905 
906 /*
907  * Free an inode.
908  *
909  * The specified inode is placed back in the free map.
910  */
911 ifree(ip, ino, mode)
912 	struct inode *ip;
913 	ino_t ino;
914 	int mode;
915 {
916 	register struct fs *fs;
917 	register struct cg *cgp;
918 	register struct buf *bp;
919 	int cg;
920 
921 	fs = ip->i_fs;
922 	if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
923 		printf("dev = 0x%x, ino = %d, fs = %s\n",
924 		    ip->i_dev, ino, fs->fs_fsmnt);
925 		panic("ifree: range");
926 	}
927 	cg = itog(fs, ino);
928 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
929 	cgp = bp->b_un.b_cg;
930 	if (bp->b_flags & B_ERROR || cgp->cg_magic != CG_MAGIC) {
931 		brelse(bp);
932 		return;
933 	}
934 	cgp->cg_time = time.tv_sec;
935 	ino %= fs->fs_ipg;
936 	if (isclr(cgp->cg_iused, ino)) {
937 		printf("dev = 0x%x, ino = %d, fs = %s\n",
938 		    ip->i_dev, ino, fs->fs_fsmnt);
939 		panic("ifree: freeing free inode");
940 	}
941 	clrbit(cgp->cg_iused, ino);
942 	if (ino < cgp->cg_irotor)
943 		cgp->cg_irotor = ino;
944 	cgp->cg_cs.cs_nifree++;
945 	fs->fs_cstotal.cs_nifree++;
946 	fs->fs_cs(fs, cg).cs_nifree++;
947 	if ((mode & IFMT) == IFDIR) {
948 		cgp->cg_cs.cs_ndir--;
949 		fs->fs_cstotal.cs_ndir--;
950 		fs->fs_cs(fs, cg).cs_ndir--;
951 	}
952 	fs->fs_fmod++;
953 	bdwrite(bp);
954 }
955 
956 /*
957  * Find a block of the specified size in the specified cylinder group.
958  *
959  * It is a panic if a request is made to find a block if none are
960  * available.
961  */
962 daddr_t
963 mapsearch(fs, cgp, bpref, allocsiz)
964 	register struct fs *fs;
965 	register struct cg *cgp;
966 	daddr_t bpref;
967 	int allocsiz;
968 {
969 	daddr_t bno;
970 	int start, len, loc, i;
971 	int blk, field, subfield, pos;
972 
973 	/*
974 	 * find the fragment by searching through the free block
975 	 * map for an appropriate bit pattern
976 	 */
977 	if (bpref)
978 		start = dtogd(fs, bpref) / NBBY;
979 	else
980 		start = cgp->cg_frotor / NBBY;
981 	len = howmany(fs->fs_fpg, NBBY) - start;
982 	loc = scanc((unsigned)len, (caddr_t)&cgp->cg_free[start],
983 		(caddr_t)fragtbl[fs->fs_frag],
984 		(int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
985 	if (loc == 0) {
986 		len = start + 1;
987 		start = 0;
988 		loc = scanc((unsigned)len, (caddr_t)&cgp->cg_free[0],
989 			(caddr_t)fragtbl[fs->fs_frag],
990 			(int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
991 		if (loc == 0) {
992 			printf("start = %d, len = %d, fs = %s\n",
993 			    start, len, fs->fs_fsmnt);
994 			panic("alloccg: map corrupted");
995 			/* NOTREACHED */
996 		}
997 	}
998 	bno = (start + len - loc) * NBBY;
999 	cgp->cg_frotor = bno;
1000 	/*
1001 	 * found the byte in the map
1002 	 * sift through the bits to find the selected frag
1003 	 */
1004 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1005 		blk = blkmap(fs, cgp->cg_free, bno);
1006 		blk <<= 1;
1007 		field = around[allocsiz];
1008 		subfield = inside[allocsiz];
1009 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1010 			if ((blk & field) == subfield)
1011 				return (bno + pos);
1012 			field <<= 1;
1013 			subfield <<= 1;
1014 		}
1015 	}
1016 	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1017 	panic("alloccg: block not in map");
1018 	return (-1);
1019 }
1020 
1021 /*
1022  * Fserr prints the name of a file system with an error diagnostic.
1023  *
1024  * The form of the error message is:
1025  *	fs: error message
1026  */
1027 fserr(fs, cp)
1028 	struct fs *fs;
1029 	char *cp;
1030 {
1031 
1032 	log(LOG_ERR, "%s: %s\n", fs->fs_fsmnt, cp);
1033 }
1034