xref: /netbsd-src/sys/ufs/ffs/ffs_balloc.c (revision 962766853c385b86328bab806c19ccdf4e22f287)
1 /*	$NetBSD: ffs_balloc.c,v 1.52 2009/02/22 20:28:06 ad Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Copyright (c) 1982, 1986, 1989, 1993
14  *	The Regents of the University of California.  All rights reserved.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions and the following disclaimer.
21  * 2. Redistributions in binary form must reproduce the above copyright
22  *    notice, this list of conditions and the following disclaimer in the
23  *    documentation and/or other materials provided with the distribution.
24  * 3. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.52 2009/02/22 20:28:06 ad Exp $");
45 
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
58 
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
64 
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67 
68 #include <uvm/uvm.h>
69 
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71     struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73     struct buf **);
74 
75 /*
76  * Balloc defines the structure of file system storage
77  * by allocating the physical blocks on a device given
78  * the inode and the logical block number in a file.
79  */
80 
81 int
82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83     struct buf **bpp)
84 {
85 	int error;
86 
87 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 	else
90 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
91 
92 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 		brelse(*bpp, 0);
94 
95 	return error;
96 }
97 
98 static int
99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100     int flags, struct buf **bpp)
101 {
102 	daddr_t lbn, lastlbn;
103 	struct buf *bp, *nbp;
104 	struct inode *ip = VTOI(vp);
105 	struct fs *fs = ip->i_fs;
106 	struct ufsmount *ump = ip->i_ump;
107 	struct indir indirs[NIADDR + 2];
108 	daddr_t newb, pref, nb;
109 	int32_t *bap;	/* XXX ondisk32 */
110 	int deallocated, osize, nsize, num, i, error;
111 	int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
112 	int32_t *allocib;
113 	int unwindidx = -1;
114 #ifdef FFS_EI
115 	const int needswap = UFS_FSNEEDSWAP(fs);
116 #endif
117 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
118 
119 	lbn = lblkno(fs, off);
120 	size = blkoff(fs, off) + size;
121 	if (size > fs->fs_bsize)
122 		panic("ffs_balloc: blk too big");
123 	if (bpp != NULL) {
124 		*bpp = NULL;
125 	}
126 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
127 
128 	if (lbn < 0)
129 		return (EFBIG);
130 
131 	/*
132 	 * If the next write will extend the file into a new block,
133 	 * and the file is currently composed of a fragment
134 	 * this fragment has to be extended to be a full block.
135 	 */
136 
137 	lastlbn = lblkno(fs, ip->i_size);
138 	if (lastlbn < NDADDR && lastlbn < lbn) {
139 		nb = lastlbn;
140 		osize = blksize(fs, ip, nb);
141 		if (osize < fs->fs_bsize && osize > 0) {
142 			mutex_enter(&ump->um_lock);
143 			error = ffs_realloccg(ip, nb,
144 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
145 					&ip->i_ffs1_db[0]),
146 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
147 			if (error)
148 				return (error);
149 			ip->i_size = lblktosize(fs, nb + 1);
150 			ip->i_ffs1_size = ip->i_size;
151 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
152 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
153 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
154 			if (bpp && *bpp) {
155 				if (flags & B_SYNC)
156 					bwrite(*bpp);
157 				else
158 					bawrite(*bpp);
159 			}
160 		}
161 	}
162 
163 	/*
164 	 * The first NDADDR blocks are direct blocks
165 	 */
166 
167 	if (lbn < NDADDR) {
168 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
169 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
170 
171 			/*
172 			 * The block is an already-allocated direct block
173 			 * and the file already extends past this block,
174 			 * thus this must be a whole block.
175 			 * Just read the block (if requested).
176 			 */
177 
178 			if (bpp != NULL) {
179 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
180 					      B_MODIFY, bpp);
181 				if (error) {
182 					brelse(*bpp, 0);
183 					return (error);
184 				}
185 			}
186 			return (0);
187 		}
188 		if (nb != 0) {
189 
190 			/*
191 			 * Consider need to reallocate a fragment.
192 			 */
193 
194 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
195 			nsize = fragroundup(fs, size);
196 			if (nsize <= osize) {
197 
198 				/*
199 				 * The existing block is already
200 				 * at least as big as we want.
201 				 * Just read the block (if requested).
202 				 */
203 
204 				if (bpp != NULL) {
205 					error = bread(vp, lbn, osize, NOCRED,
206 						      B_MODIFY, bpp);
207 					if (error) {
208 						brelse(*bpp, 0);
209 						return (error);
210 					}
211 				}
212 				return 0;
213 			} else {
214 
215 				/*
216 				 * The existing block is smaller than we want,
217 				 * grow it.
218 				 */
219 				mutex_enter(&ump->um_lock);
220 				error = ffs_realloccg(ip, lbn,
221 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
222 					&ip->i_ffs1_db[0]),
223 				    osize, nsize, cred, bpp, &newb);
224 				if (error)
225 					return (error);
226 			}
227 		} else {
228 
229 			/*
230 			 * the block was not previously allocated,
231 			 * allocate a new block or fragment.
232 			 */
233 
234 			if (ip->i_size < lblktosize(fs, lbn + 1))
235 				nsize = fragroundup(fs, size);
236 			else
237 				nsize = fs->fs_bsize;
238 			mutex_enter(&ump->um_lock);
239 			error = ffs_alloc(ip, lbn,
240 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
241 				&ip->i_ffs1_db[0]),
242 			    nsize, flags, cred, &newb);
243 			if (error)
244 				return (error);
245 			if (bpp != NULL) {
246 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
247 				    nsize, (flags & B_CLRBUF) != 0, bpp);
248 				if (error)
249 					return error;
250 			}
251 		}
252 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
253 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
254 		return (0);
255 	}
256 
257 	/*
258 	 * Determine the number of levels of indirection.
259 	 */
260 
261 	pref = 0;
262 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
263 		return (error);
264 
265 	/*
266 	 * Fetch the first indirect block allocating if necessary.
267 	 */
268 
269 	--num;
270 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
271 	allocib = NULL;
272 	allocblk = allociblk;
273 	if (nb == 0) {
274 		mutex_enter(&ump->um_lock);
275 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
276 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
277 		    flags | B_METAONLY, cred, &newb);
278 		if (error)
279 			goto fail;
280 		nb = newb;
281 		*allocblk++ = nb;
282 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
283 		    fs->fs_bsize, true, &bp);
284 		if (error)
285 			goto fail;
286 		/*
287 		 * Write synchronously so that indirect blocks
288 		 * never point at garbage.
289 		 */
290 		if ((error = bwrite(bp)) != 0)
291 			goto fail;
292 		unwindidx = 0;
293 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
294 		*allocib = ufs_rw32(nb, needswap);
295 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
296 	}
297 
298 	/*
299 	 * Fetch through the indirect blocks, allocating as necessary.
300 	 */
301 
302 	for (i = 1;;) {
303 		error = bread(vp,
304 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
305 		if (error) {
306 			brelse(bp, 0);
307 			goto fail;
308 		}
309 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
310 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
311 		if (i == num)
312 			break;
313 		i++;
314 		if (nb != 0) {
315 			brelse(bp, 0);
316 			continue;
317 		}
318 		if (fscow_run(bp, true) != 0) {
319 			brelse(bp, 0);
320 			goto fail;
321 		}
322 		mutex_enter(&ump->um_lock);
323 		if (pref == 0)
324 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
325 			    NULL);
326 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
327 		    flags | B_METAONLY, cred, &newb);
328 		if (error) {
329 			brelse(bp, 0);
330 			goto fail;
331 		}
332 		nb = newb;
333 		*allocblk++ = nb;
334 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
335 		    fs->fs_bsize, true, &nbp);
336 		if (error) {
337 			brelse(bp, 0);
338 			goto fail;
339 		}
340 		/*
341 		 * Write synchronously so that indirect blocks
342 		 * never point at garbage.
343 		 */
344 		if ((error = bwrite(nbp)) != 0) {
345 			brelse(bp, 0);
346 			goto fail;
347 		}
348 		if (unwindidx < 0)
349 			unwindidx = i - 1;
350 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
351 
352 		/*
353 		 * If required, write synchronously, otherwise use
354 		 * delayed write.
355 		 */
356 
357 		if (flags & B_SYNC) {
358 			bwrite(bp);
359 		} else {
360 			bdwrite(bp);
361 		}
362 	}
363 
364 	if (flags & B_METAONLY) {
365 		KASSERT(bpp != NULL);
366 		*bpp = bp;
367 		return (0);
368 	}
369 
370 	/*
371 	 * Get the data block, allocating if necessary.
372 	 */
373 
374 	if (nb == 0) {
375 		if (fscow_run(bp, true) != 0) {
376 			brelse(bp, 0);
377 			goto fail;
378 		}
379 		mutex_enter(&ump->um_lock);
380 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
381 		    &bap[0]);
382 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
383 		    &newb);
384 		if (error) {
385 			brelse(bp, 0);
386 			goto fail;
387 		}
388 		nb = newb;
389 		*allocblk++ = nb;
390 		if (bpp != NULL) {
391 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
392 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
393 			if (error) {
394 				brelse(bp, 0);
395 				goto fail;
396 			}
397 		}
398 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
399 		if (allocib == NULL && unwindidx < 0) {
400 			unwindidx = i - 1;
401 		}
402 
403 		/*
404 		 * If required, write synchronously, otherwise use
405 		 * delayed write.
406 		 */
407 
408 		if (flags & B_SYNC) {
409 			bwrite(bp);
410 		} else {
411 			bdwrite(bp);
412 		}
413 		return (0);
414 	}
415 	brelse(bp, 0);
416 	if (bpp != NULL) {
417 		if (flags & B_CLRBUF) {
418 			error = bread(vp, lbn, (int)fs->fs_bsize,
419 			    NOCRED, B_MODIFY, &nbp);
420 			if (error) {
421 				brelse(nbp, 0);
422 				goto fail;
423 			}
424 		} else {
425 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
426 			    fs->fs_bsize, true, &nbp);
427 			if (error)
428 				goto fail;
429 		}
430 		*bpp = nbp;
431 	}
432 	return (0);
433 
434 fail:
435 	/*
436 	 * If we have failed part way through block allocation, we
437 	 * have to deallocate any indirect blocks that we have allocated.
438 	 */
439 
440 	if (unwindidx >= 0) {
441 
442 		/*
443 		 * First write out any buffers we've created to resolve their
444 		 * softdeps.  This must be done in reverse order of creation
445 		 * so that we resolve the dependencies in one pass.
446 		 * Write the cylinder group buffers for these buffers too.
447 		 */
448 
449 		for (i = num; i >= unwindidx; i--) {
450 			if (i == 0) {
451 				break;
452 			}
453 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
454 			    fs->fs_bsize, false, &bp) != 0)
455 				continue;
456 			if (bp->b_oflags & BO_DELWRI) {
457 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
458 				    dbtofsb(fs, bp->b_blkno))));
459 				bwrite(bp);
460 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
461 				    fs->fs_cgsize, false, &bp) != 0)
462 					continue;
463 				if (bp->b_oflags & BO_DELWRI) {
464 					bwrite(bp);
465 				} else {
466 					brelse(bp, BC_INVAL);
467 				}
468 			} else {
469 				brelse(bp, BC_INVAL);
470 			}
471 		}
472 
473 		/*
474 		 * Undo the partial allocation.
475 		 */
476 		if (unwindidx == 0) {
477 			*allocib = 0;
478 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
479 		} else {
480 			int r;
481 
482 			r = bread(vp, indirs[unwindidx].in_lbn,
483 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
484 			if (r) {
485 				panic("Could not unwind indirect block, error %d", r);
486 				brelse(bp, 0);
487 			} else {
488 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
489 				bap[indirs[unwindidx].in_off] = 0;
490 				bwrite(bp);
491 			}
492 		}
493 		for (i = unwindidx + 1; i <= num; i++) {
494 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
495 			    fs->fs_bsize, false, &bp) == 0)
496 				brelse(bp, BC_INVAL);
497 		}
498 	}
499 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
500 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
501 		deallocated += fs->fs_bsize;
502 	}
503 	if (deallocated) {
504 #ifdef QUOTA
505 		/*
506 		 * Restore user's disk quota because allocation failed.
507 		 */
508 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
509 #endif
510 		ip->i_ffs1_blocks -= btodb(deallocated);
511 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
512 	}
513 	return (error);
514 }
515 
516 static int
517 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
518     int flags, struct buf **bpp)
519 {
520 	daddr_t lbn, lastlbn;
521 	struct buf *bp, *nbp;
522 	struct inode *ip = VTOI(vp);
523 	struct fs *fs = ip->i_fs;
524 	struct ufsmount *ump = ip->i_ump;
525 	struct indir indirs[NIADDR + 2];
526 	daddr_t newb, pref, nb;
527 	int64_t *bap;
528 	int deallocated, osize, nsize, num, i, error;
529 	daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
530 	int64_t *allocib;
531 	int unwindidx = -1;
532 #ifdef FFS_EI
533 	const int needswap = UFS_FSNEEDSWAP(fs);
534 #endif
535 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
536 
537 	lbn = lblkno(fs, off);
538 	size = blkoff(fs, off) + size;
539 	if (size > fs->fs_bsize)
540 		panic("ffs_balloc: blk too big");
541 	if (bpp != NULL) {
542 		*bpp = NULL;
543 	}
544 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
545 
546 	if (lbn < 0)
547 		return (EFBIG);
548 
549 #ifdef notyet
550 	/*
551 	 * Check for allocating external data.
552 	 */
553 	if (flags & IO_EXT) {
554 		if (lbn >= NXADDR)
555 			return (EFBIG);
556 		/*
557 		 * If the next write will extend the data into a new block,
558 		 * and the data is currently composed of a fragment
559 		 * this fragment has to be extended to be a full block.
560 		 */
561 		lastlbn = lblkno(fs, dp->di_extsize);
562 		if (lastlbn < lbn) {
563 			nb = lastlbn;
564 			osize = sblksize(fs, dp->di_extsize, nb);
565 			if (osize < fs->fs_bsize && osize > 0) {
566 				mutex_enter(&ump->um_lock);
567 				error = ffs_realloccg(ip, -1 - nb,
568 				    dp->di_extb[nb],
569 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
570 					flags, &dp->di_extb[0]),
571 				    osize,
572 				    (int)fs->fs_bsize, cred, &bp);
573 				if (error)
574 					return (error);
575 				dp->di_extsize = smalllblktosize(fs, nb + 1);
576 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
577 				bp->b_xflags |= BX_ALTDATA;
578 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
579 				if (flags & IO_SYNC)
580 					bwrite(bp);
581 				else
582 					bawrite(bp);
583 			}
584 		}
585 		/*
586 		 * All blocks are direct blocks
587 		 */
588 		if (flags & BA_METAONLY)
589 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
590 		nb = dp->di_extb[lbn];
591 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
592 			error = bread(vp, -1 - lbn, fs->fs_bsize,
593 			    NOCRED, 0, &bp);
594 			if (error) {
595 				brelse(bp, 0);
596 				return (error);
597 			}
598 			mutex_enter(&bp->b_interlock);
599 			bp->b_blkno = fsbtodb(fs, nb);
600 			bp->b_xflags |= BX_ALTDATA;
601 			mutex_exit(&bp->b_interlock);
602 			*bpp = bp;
603 			return (0);
604 		}
605 		if (nb != 0) {
606 			/*
607 			 * Consider need to reallocate a fragment.
608 			 */
609 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
610 			nsize = fragroundup(fs, size);
611 			if (nsize <= osize) {
612 				error = bread(vp, -1 - lbn, osize,
613 				    NOCRED, 0, &bp);
614 				if (error) {
615 					brelse(bp, 0);
616 					return (error);
617 				}
618 				mutex_enter(&bp->b_interlock);
619 				bp->b_blkno = fsbtodb(fs, nb);
620 				bp->b_xflags |= BX_ALTDATA;
621 				mutex_exit(&bp->b_interlock);
622 			} else {
623 				mutex_enter(&ump->um_lock);
624 				error = ffs_realloccg(ip, -1 - lbn,
625 				    dp->di_extb[lbn],
626 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
627 				        &dp->di_extb[0]),
628 				    osize, nsize, cred, &bp);
629 				if (error)
630 					return (error);
631 				bp->b_xflags |= BX_ALTDATA;
632 			}
633 		} else {
634 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
635 				nsize = fragroundup(fs, size);
636 			else
637 				nsize = fs->fs_bsize;
638 			mutex_enter(&ump->um_lock);
639 			error = ffs_alloc(ip, lbn,
640 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
641 			       &dp->di_extb[0]),
642 			   nsize, flags, cred, &newb);
643 			if (error)
644 				return (error);
645 			error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
646 			    nsize, (flags & BA_CLRBUF) != 0, &bp);
647 			if (error)
648 				return error;
649 			bp->b_xflags |= BX_ALTDATA;
650 		}
651 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
652 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
653 		*bpp = bp;
654 		return (0);
655 	}
656 #endif
657 	/*
658 	 * If the next write will extend the file into a new block,
659 	 * and the file is currently composed of a fragment
660 	 * this fragment has to be extended to be a full block.
661 	 */
662 
663 	lastlbn = lblkno(fs, ip->i_size);
664 	if (lastlbn < NDADDR && lastlbn < lbn) {
665 		nb = lastlbn;
666 		osize = blksize(fs, ip, nb);
667 		if (osize < fs->fs_bsize && osize > 0) {
668 			mutex_enter(&ump->um_lock);
669 			error = ffs_realloccg(ip, nb,
670 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
671 					&ip->i_ffs2_db[0]),
672 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
673 			if (error)
674 				return (error);
675 			ip->i_size = lblktosize(fs, nb + 1);
676 			ip->i_ffs2_size = ip->i_size;
677 			uvm_vnp_setsize(vp, ip->i_size);
678 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
679 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
680 			if (bpp) {
681 				if (flags & B_SYNC)
682 					bwrite(*bpp);
683 				else
684 					bawrite(*bpp);
685 			}
686 		}
687 	}
688 
689 	/*
690 	 * The first NDADDR blocks are direct blocks
691 	 */
692 
693 	if (lbn < NDADDR) {
694 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
695 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
696 
697 			/*
698 			 * The block is an already-allocated direct block
699 			 * and the file already extends past this block,
700 			 * thus this must be a whole block.
701 			 * Just read the block (if requested).
702 			 */
703 
704 			if (bpp != NULL) {
705 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
706 					      B_MODIFY, bpp);
707 				if (error) {
708 					brelse(*bpp, 0);
709 					return (error);
710 				}
711 			}
712 			return (0);
713 		}
714 		if (nb != 0) {
715 
716 			/*
717 			 * Consider need to reallocate a fragment.
718 			 */
719 
720 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
721 			nsize = fragroundup(fs, size);
722 			if (nsize <= osize) {
723 
724 				/*
725 				 * The existing block is already
726 				 * at least as big as we want.
727 				 * Just read the block (if requested).
728 				 */
729 
730 				if (bpp != NULL) {
731 					error = bread(vp, lbn, osize, NOCRED,
732 						      B_MODIFY, bpp);
733 					if (error) {
734 						brelse(*bpp, 0);
735 						return (error);
736 					}
737 				}
738 				return 0;
739 			} else {
740 
741 				/*
742 				 * The existing block is smaller than we want,
743 				 * grow it.
744 				 */
745 				mutex_enter(&ump->um_lock);
746 				error = ffs_realloccg(ip, lbn,
747 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
748 					&ip->i_ffs2_db[0]),
749 				    osize, nsize, cred, bpp, &newb);
750 				if (error)
751 					return (error);
752 			}
753 		} else {
754 
755 			/*
756 			 * the block was not previously allocated,
757 			 * allocate a new block or fragment.
758 			 */
759 
760 			if (ip->i_size < lblktosize(fs, lbn + 1))
761 				nsize = fragroundup(fs, size);
762 			else
763 				nsize = fs->fs_bsize;
764 			mutex_enter(&ump->um_lock);
765 			error = ffs_alloc(ip, lbn,
766 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
767 				&ip->i_ffs2_db[0]),
768 			    nsize, flags, cred, &newb);
769 			if (error)
770 				return (error);
771 			if (bpp != NULL) {
772 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
773 				    nsize, (flags & B_CLRBUF) != 0, bpp);
774 				if (error)
775 					return error;
776 			}
777 		}
778 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
779 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
780 		return (0);
781 	}
782 
783 	/*
784 	 * Determine the number of levels of indirection.
785 	 */
786 
787 	pref = 0;
788 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
789 		return (error);
790 
791 	/*
792 	 * Fetch the first indirect block allocating if necessary.
793 	 */
794 
795 	--num;
796 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
797 	allocib = NULL;
798 	allocblk = allociblk;
799 	if (nb == 0) {
800 		mutex_enter(&ump->um_lock);
801 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
802 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
803 		    flags | B_METAONLY, cred, &newb);
804 		if (error)
805 			goto fail;
806 		nb = newb;
807 		*allocblk++ = nb;
808 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
809 		    fs->fs_bsize, true, &bp);
810 		if (error)
811 			goto fail;
812 		/*
813 		 * Write synchronously so that indirect blocks
814 		 * never point at garbage.
815 		 */
816 		if ((error = bwrite(bp)) != 0)
817 			goto fail;
818 		unwindidx = 0;
819 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
820 		*allocib = ufs_rw64(nb, needswap);
821 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
822 	}
823 
824 	/*
825 	 * Fetch through the indirect blocks, allocating as necessary.
826 	 */
827 
828 	for (i = 1;;) {
829 		error = bread(vp,
830 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
831 		if (error) {
832 			brelse(bp, 0);
833 			goto fail;
834 		}
835 		bap = (int64_t *)bp->b_data;
836 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
837 		if (i == num)
838 			break;
839 		i++;
840 		if (nb != 0) {
841 			brelse(bp, 0);
842 			continue;
843 		}
844 		if (fscow_run(bp, true) != 0) {
845 			brelse(bp, 0);
846 			goto fail;
847 		}
848 		mutex_enter(&ump->um_lock);
849 		if (pref == 0)
850 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
851 			    NULL);
852 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
853 		    flags | B_METAONLY, cred, &newb);
854 		if (error) {
855 			brelse(bp, 0);
856 			goto fail;
857 		}
858 		nb = newb;
859 		*allocblk++ = nb;
860 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
861 		    fs->fs_bsize, true, &nbp);
862 		if (error) {
863 			brelse(bp, 0);
864 			goto fail;
865 		}
866 		/*
867 		 * Write synchronously so that indirect blocks
868 		 * never point at garbage.
869 		 */
870 		if ((error = bwrite(nbp)) != 0) {
871 			brelse(bp, 0);
872 			goto fail;
873 		}
874 		if (unwindidx < 0)
875 			unwindidx = i - 1;
876 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
877 
878 		/*
879 		 * If required, write synchronously, otherwise use
880 		 * delayed write.
881 		 */
882 
883 		if (flags & B_SYNC) {
884 			bwrite(bp);
885 		} else {
886 			bdwrite(bp);
887 		}
888 	}
889 
890 	if (flags & B_METAONLY) {
891 		KASSERT(bpp != NULL);
892 		*bpp = bp;
893 		return (0);
894 	}
895 
896 	/*
897 	 * Get the data block, allocating if necessary.
898 	 */
899 
900 	if (nb == 0) {
901 		if (fscow_run(bp, true) != 0) {
902 			brelse(bp, 0);
903 			goto fail;
904 		}
905 		mutex_enter(&ump->um_lock);
906 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
907 		    &bap[0]);
908 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
909 		    &newb);
910 		if (error) {
911 			brelse(bp, 0);
912 			goto fail;
913 		}
914 		nb = newb;
915 		*allocblk++ = nb;
916 		if (bpp != NULL) {
917 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
918 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
919 			if (error) {
920 				brelse(bp, 0);
921 				goto fail;
922 			}
923 		}
924 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
925 		if (allocib == NULL && unwindidx < 0) {
926 			unwindidx = i - 1;
927 		}
928 
929 		/*
930 		 * If required, write synchronously, otherwise use
931 		 * delayed write.
932 		 */
933 
934 		if (flags & B_SYNC) {
935 			bwrite(bp);
936 		} else {
937 			bdwrite(bp);
938 		}
939 		return (0);
940 	}
941 	brelse(bp, 0);
942 	if (bpp != NULL) {
943 		if (flags & B_CLRBUF) {
944 			error = bread(vp, lbn, (int)fs->fs_bsize,
945 			    NOCRED, B_MODIFY, &nbp);
946 			if (error) {
947 				brelse(nbp, 0);
948 				goto fail;
949 			}
950 		} else {
951 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
952 			    fs->fs_bsize, true, &nbp);
953 			if (error)
954 				goto fail;
955 		}
956 		*bpp = nbp;
957 	}
958 	return (0);
959 
960 fail:
961 	/*
962 	 * If we have failed part way through block allocation, we
963 	 * have to deallocate any indirect blocks that we have allocated.
964 	 */
965 
966 	if (unwindidx >= 0) {
967 
968 		/*
969 		 * First write out any buffers we've created to resolve their
970 		 * softdeps.  This must be done in reverse order of creation
971 		 * so that we resolve the dependencies in one pass.
972 		 * Write the cylinder group buffers for these buffers too.
973 		 */
974 
975 		for (i = num; i >= unwindidx; i--) {
976 			if (i == 0) {
977 				break;
978 			}
979 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
980 			    fs->fs_bsize, false, &bp) != 0)
981 				continue;
982 			if (bp->b_oflags & BO_DELWRI) {
983 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
984 				    dbtofsb(fs, bp->b_blkno))));
985 				bwrite(bp);
986 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
987 				    fs->fs_cgsize, false, &bp) != 0)
988 					continue;
989 				if (bp->b_oflags & BO_DELWRI) {
990 					bwrite(bp);
991 				} else {
992 					brelse(bp, BC_INVAL);
993 				}
994 			} else {
995 				brelse(bp, BC_INVAL);
996 			}
997 		}
998 
999 		/*
1000 		 * Now that any dependencies that we created have been
1001 		 * resolved, we can undo the partial allocation.
1002 		 */
1003 
1004 		if (unwindidx == 0) {
1005 			*allocib = 0;
1006 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
1007 		} else {
1008 			int r;
1009 
1010 			r = bread(vp, indirs[unwindidx].in_lbn,
1011 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
1012 			if (r) {
1013 				panic("Could not unwind indirect block, error %d", r);
1014 				brelse(bp, 0);
1015 			} else {
1016 				bap = (int64_t *)bp->b_data;
1017 				bap[indirs[unwindidx].in_off] = 0;
1018 				bwrite(bp);
1019 			}
1020 		}
1021 		for (i = unwindidx + 1; i <= num; i++) {
1022 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1023 			    fs->fs_bsize, false, &bp) == 0)
1024 				brelse(bp, BC_INVAL);
1025 		}
1026 	}
1027 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1028 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1029 		deallocated += fs->fs_bsize;
1030 	}
1031 	if (deallocated) {
1032 #ifdef QUOTA
1033 		/*
1034 		 * Restore user's disk quota because allocation failed.
1035 		 */
1036 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1037 #endif
1038 		ip->i_ffs2_blocks -= btodb(deallocated);
1039 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1040 	}
1041 
1042 	return (error);
1043 }
1044