xref: /netbsd-src/sys/ufs/ffs/ffs_balloc.c (revision 87ba0e2a319653af510fae24a6d2975d3589735b)
1 /*	$NetBSD: ffs_balloc.c,v 1.66 2022/11/17 06:40:40 chs Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Copyright (c) 1982, 1986, 1989, 1993
14  *	The Regents of the University of California.  All rights reserved.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions and the following disclaimer.
21  * 2. Redistributions in binary form must reproduce the above copyright
22  *    notice, this list of conditions and the following disclaimer in the
23  *    documentation and/or other materials provided with the distribution.
24  * 3. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.66 2022/11/17 06:40:40 chs Exp $");
45 
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #include "opt_uvmhist.h"
49 #endif
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/buf.h>
54 #include <sys/file.h>
55 #include <sys/mount.h>
56 #include <sys/vnode.h>
57 #include <sys/kauth.h>
58 #include <sys/fstrans.h>
59 
60 #include <ufs/ufs/quota.h>
61 #include <ufs/ufs/ufsmount.h>
62 #include <ufs/ufs/inode.h>
63 #include <ufs/ufs/ufs_extern.h>
64 #include <ufs/ufs/ufs_bswap.h>
65 
66 #include <ufs/ffs/fs.h>
67 #include <ufs/ffs/ffs_extern.h>
68 
69 #ifdef UVMHIST
70 #include <uvm/uvm.h>
71 #endif
72 #include <uvm/uvm_extern.h>
73 #include <uvm/uvm_stat.h>
74 
75 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
76     struct buf **);
77 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
78     struct buf **);
79 
80 static daddr_t
ffs_extb(struct fs * fs,struct ufs2_dinode * dp,daddr_t nb)81 ffs_extb(struct fs *fs, struct ufs2_dinode *dp, daddr_t nb)
82 {
83 	return ufs_rw64(dp->di_extb[nb], UFS_FSNEEDSWAP(fs));
84 }
85 
86 /*
87  * Balloc defines the structure of file system storage
88  * by allocating the physical blocks on a device given
89  * the inode and the logical block number in a file.
90  */
91 
92 int
ffs_balloc(struct vnode * vp,off_t off,int size,kauth_cred_t cred,int flags,struct buf ** bpp)93 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
94     struct buf **bpp)
95 {
96 	int error;
97 
98 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
99 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
100 	else
101 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
102 
103 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
104 		brelse(*bpp, 0);
105 
106 	return error;
107 }
108 
109 static int
ffs_balloc_ufs1(struct vnode * vp,off_t off,int size,kauth_cred_t cred,int flags,struct buf ** bpp)110 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
111     int flags, struct buf **bpp)
112 {
113 	daddr_t lbn, lastlbn;
114 	struct buf *bp, *nbp;
115 	struct inode *ip = VTOI(vp);
116 	struct fs *fs = ip->i_fs;
117 	struct ufsmount *ump = ip->i_ump;
118 	struct indir indirs[UFS_NIADDR + 2];
119 	daddr_t newb, pref, nb;
120 	int32_t *bap;	/* XXX ondisk32 */
121 	int deallocated, osize, nsize, num, i, error;
122 	int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
123 	int32_t *allocib;
124 	int unwindidx = -1;
125 	const int needswap = UFS_FSNEEDSWAP(fs);
126 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
127 
128 	lbn = ffs_lblkno(fs, off);
129 	size = ffs_blkoff(fs, off) + size;
130 	if (size > fs->fs_bsize)
131 		panic("ffs_balloc: blk too big");
132 	if (bpp != NULL) {
133 		*bpp = NULL;
134 	}
135 	UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
136 	    lbn, size, 0);
137 
138 	if (lbn < 0)
139 		return (EFBIG);
140 
141 	/*
142 	 * If the next write will extend the file into a new block,
143 	 * and the file is currently composed of a fragment
144 	 * this fragment has to be extended to be a full block.
145 	 */
146 
147 	lastlbn = ffs_lblkno(fs, ip->i_size);
148 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
149 		nb = lastlbn;
150 		osize = ffs_blksize(fs, ip, nb);
151 		if (osize < fs->fs_bsize && osize > 0) {
152 			mutex_enter(&ump->um_lock);
153 			error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, nb),
154 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
155 					&ip->i_ffs1_db[0]),
156 				    osize, (int)fs->fs_bsize, flags, cred, bpp,
157 				    &newb);
158 			if (error)
159 				return (error);
160 			ip->i_size = ffs_lblktosize(fs, nb + 1);
161 			ip->i_ffs1_size = ip->i_size;
162 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
163 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
164 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
165 			if (bpp && *bpp) {
166 				if (flags & B_SYNC)
167 					bwrite(*bpp);
168 				else
169 					bawrite(*bpp);
170 			}
171 		}
172 	}
173 
174 	/*
175 	 * The first UFS_NDADDR blocks are direct blocks
176 	 */
177 
178 	if (lbn < UFS_NDADDR) {
179 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
180 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
181 
182 			/*
183 			 * The block is an already-allocated direct block
184 			 * and the file already extends past this block,
185 			 * thus this must be a whole block.
186 			 * Just read the block (if requested).
187 			 */
188 
189 			if (bpp != NULL) {
190 				error = bread(vp, lbn, fs->fs_bsize,
191 					      B_MODIFY, bpp);
192 				if (error) {
193 					return (error);
194 				}
195 			}
196 			return (0);
197 		}
198 		if (nb != 0) {
199 
200 			/*
201 			 * Consider need to reallocate a fragment.
202 			 */
203 
204 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
205 			nsize = ffs_fragroundup(fs, size);
206 			if (nsize <= osize) {
207 
208 				/*
209 				 * The existing block is already
210 				 * at least as big as we want.
211 				 * Just read the block (if requested).
212 				 */
213 
214 				if (bpp != NULL) {
215 					error = bread(vp, lbn, osize,
216 						      B_MODIFY, bpp);
217 					if (error) {
218 						return (error);
219 					}
220 				}
221 				return 0;
222 			} else {
223 
224 				/*
225 				 * The existing block is smaller than we want,
226 				 * grow it.
227 				 */
228 				mutex_enter(&ump->um_lock);
229 				error = ffs_realloccg(ip, lbn,
230 				    ffs_getdb(fs, ip, lbn),
231 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
232 					&ip->i_ffs1_db[0]),
233 				    osize, nsize, flags, cred, bpp, &newb);
234 				if (error)
235 					return (error);
236 			}
237 		} else {
238 
239 			/*
240 			 * the block was not previously allocated,
241 			 * allocate a new block or fragment.
242 			 */
243 
244 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
245 				nsize = ffs_fragroundup(fs, size);
246 			else
247 				nsize = fs->fs_bsize;
248 			mutex_enter(&ump->um_lock);
249 			error = ffs_alloc(ip, lbn,
250 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
251 				&ip->i_ffs1_db[0]),
252 			    nsize, flags, cred, &newb);
253 			if (error)
254 				return (error);
255 			if (bpp != NULL) {
256 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
257 				    nsize, (flags & B_CLRBUF) != 0, bpp);
258 				if (error)
259 					return error;
260 			}
261 		}
262 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
263 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
264 		return (0);
265 	}
266 
267 	/*
268 	 * Determine the number of levels of indirection.
269 	 */
270 
271 	pref = 0;
272 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
273 		return (error);
274 
275 	/*
276 	 * Fetch the first indirect block allocating if necessary.
277 	 */
278 
279 	--num;
280 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
281 	allocib = NULL;
282 	allocblk = allociblk;
283 	if (nb == 0) {
284 		mutex_enter(&ump->um_lock);
285 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
286 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
287 		    flags | B_METAONLY, cred, &newb);
288 		if (error)
289 			goto fail;
290 		nb = newb;
291 		*allocblk++ = nb;
292 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
293 		    fs->fs_bsize, true, &bp);
294 		if (error)
295 			goto fail;
296 		/*
297 		 * Write synchronously so that indirect blocks
298 		 * never point at garbage.
299 		 */
300 		if ((error = bwrite(bp)) != 0)
301 			goto fail;
302 		unwindidx = 0;
303 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
304 		*allocib = ufs_rw32(nb, needswap);
305 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
306 	}
307 
308 	/*
309 	 * Fetch through the indirect blocks, allocating as necessary.
310 	 */
311 
312 	for (i = 1;;) {
313 		error = bread(vp,
314 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
315 		if (error) {
316 			goto fail;
317 		}
318 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
319 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
320 		if (i == num)
321 			break;
322 		i++;
323 		if (nb != 0) {
324 			brelse(bp, 0);
325 			continue;
326 		}
327 		if (fscow_run(bp, true) != 0) {
328 			brelse(bp, 0);
329 			goto fail;
330 		}
331 		mutex_enter(&ump->um_lock);
332 		/* Try to keep snapshot indirect blocks contiguous. */
333 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
334 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
335 			    flags | B_METAONLY, &bap[0]);
336 		if (pref == 0)
337 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
338 			    NULL);
339 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
340 		    flags | B_METAONLY, cred, &newb);
341 		if (error) {
342 			brelse(bp, 0);
343 			goto fail;
344 		}
345 		nb = newb;
346 		*allocblk++ = nb;
347 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
348 		    fs->fs_bsize, true, &nbp);
349 		if (error) {
350 			brelse(bp, 0);
351 			goto fail;
352 		}
353 		/*
354 		 * Write synchronously so that indirect blocks
355 		 * never point at garbage.
356 		 */
357 		if ((error = bwrite(nbp)) != 0) {
358 			brelse(bp, 0);
359 			goto fail;
360 		}
361 		if (unwindidx < 0)
362 			unwindidx = i - 1;
363 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
364 
365 		/*
366 		 * If required, write synchronously, otherwise use
367 		 * delayed write.
368 		 */
369 
370 		if (flags & B_SYNC) {
371 			bwrite(bp);
372 		} else {
373 			bdwrite(bp);
374 		}
375 	}
376 
377 	if (flags & B_METAONLY) {
378 		KASSERT(bpp != NULL);
379 		*bpp = bp;
380 		return (0);
381 	}
382 
383 	/*
384 	 * Get the data block, allocating if necessary.
385 	 */
386 
387 	if (nb == 0) {
388 		if (fscow_run(bp, true) != 0) {
389 			brelse(bp, 0);
390 			goto fail;
391 		}
392 		mutex_enter(&ump->um_lock);
393 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
394 		    &bap[0]);
395 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
396 		    &newb);
397 		if (error) {
398 			brelse(bp, 0);
399 			goto fail;
400 		}
401 		nb = newb;
402 		*allocblk++ = nb;
403 		if (bpp != NULL) {
404 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
405 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
406 			if (error) {
407 				brelse(bp, 0);
408 				goto fail;
409 			}
410 		}
411 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
412 		if (allocib == NULL && unwindidx < 0) {
413 			unwindidx = i - 1;
414 		}
415 
416 		/*
417 		 * If required, write synchronously, otherwise use
418 		 * delayed write.
419 		 */
420 
421 		if (flags & B_SYNC) {
422 			bwrite(bp);
423 		} else {
424 			bdwrite(bp);
425 		}
426 		return (0);
427 	}
428 	brelse(bp, 0);
429 	if (bpp != NULL) {
430 		if (flags & B_CLRBUF) {
431 			error = bread(vp, lbn, (int)fs->fs_bsize,
432 			    B_MODIFY, &nbp);
433 			if (error) {
434 				goto fail;
435 			}
436 		} else {
437 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
438 			    fs->fs_bsize, true, &nbp);
439 			if (error)
440 				goto fail;
441 		}
442 		*bpp = nbp;
443 	}
444 	return (0);
445 
446 fail:
447 	/*
448 	 * If we have failed part way through block allocation, we
449 	 * have to deallocate any indirect blocks that we have allocated.
450 	 */
451 
452 	if (unwindidx >= 0) {
453 
454 		/*
455 		 * First write out any buffers we've created to resolve their
456 		 * softdeps.  This must be done in reverse order of creation
457 		 * so that we resolve the dependencies in one pass.
458 		 * Write the cylinder group buffers for these buffers too.
459 		 */
460 
461 		for (i = num; i >= unwindidx; i--) {
462 			if (i == 0) {
463 				break;
464 			}
465 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
466 			    fs->fs_bsize, false, &bp) != 0)
467 				continue;
468 			if (bp->b_oflags & BO_DELWRI) {
469 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
470 				    FFS_DBTOFSB(fs, bp->b_blkno))));
471 				bwrite(bp);
472 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
473 				    fs->fs_cgsize, false, &bp) != 0)
474 					continue;
475 				if (bp->b_oflags & BO_DELWRI) {
476 					bwrite(bp);
477 				} else {
478 					brelse(bp, BC_INVAL);
479 				}
480 			} else {
481 				brelse(bp, BC_INVAL);
482 			}
483 		}
484 
485 		/*
486 		 * Undo the partial allocation.
487 		 */
488 		if (unwindidx == 0) {
489 			*allocib = 0;
490 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
491 		} else {
492 			int r;
493 
494 			r = bread(vp, indirs[unwindidx].in_lbn,
495 			    (int)fs->fs_bsize, 0, &bp);
496 			if (r) {
497 				panic("Could not unwind indirect block, error %d", r);
498 			} else {
499 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
500 				bap[indirs[unwindidx].in_off] = 0;
501 				bwrite(bp);
502 			}
503 		}
504 		for (i = unwindidx + 1; i <= num; i++) {
505 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
506 			    fs->fs_bsize, false, &bp) == 0)
507 				brelse(bp, BC_INVAL);
508 		}
509 	}
510 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
511 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
512 		deallocated += fs->fs_bsize;
513 	}
514 	if (deallocated) {
515 #if defined(QUOTA) || defined(QUOTA2)
516 		/*
517 		 * Restore user's disk quota because allocation failed.
518 		 */
519 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
520 #endif
521 		ip->i_ffs1_blocks -= btodb(deallocated);
522 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
523 	}
524 	return (error);
525 }
526 
527 static int
ffs_balloc_ufs2(struct vnode * vp,off_t off,int size,kauth_cred_t cred,int flags,struct buf ** bpp)528 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
529     int flags, struct buf **bpp)
530 {
531 	daddr_t lbn, lastlbn;
532 	struct buf *bp, *nbp;
533 	struct inode *ip = VTOI(vp);
534 	struct fs *fs = ip->i_fs;
535 	struct ufsmount *ump = ip->i_ump;
536 	struct indir indirs[UFS_NIADDR + 2];
537 	daddr_t newb, pref, nb;
538 	int64_t *bap;
539 	int deallocated, osize, nsize, num, i, error;
540 	daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
541 	int64_t *allocib;
542 	int unwindidx = -1;
543 	const int needswap = UFS_FSNEEDSWAP(fs);
544 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
545 
546 	KASSERT((ump->um_flags & UFS_EA) != 0 || (flags & IO_EXT) == 0);
547 
548 	lbn = ffs_lblkno(fs, off);
549 	size = ffs_blkoff(fs, off) + size;
550 	if (size > fs->fs_bsize)
551 		panic("ffs_balloc: blk too big");
552 	if (bpp != NULL) {
553 		*bpp = NULL;
554 	}
555 	UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
556 	    lbn, size, 0);
557 
558 	if (lbn < 0)
559 		return (EFBIG);
560 
561 	/*
562 	 * Check for allocating external data.
563 	 */
564 	if (flags & IO_EXT) {
565 		struct ufs2_dinode *dp = ip->i_din.ffs2_din;
566 		if (lbn >= UFS_NXADDR)
567 			return (EFBIG);
568 		/*
569 		 * If the next write will extend the data into a new block,
570 		 * and the data is currently composed of a fragment
571 		 * this fragment has to be extended to be a full block.
572 		 */
573 		lastlbn = ffs_lblkno(fs, dp->di_extsize);
574 		if (lastlbn < lbn) {
575 			nb = lastlbn;
576 			osize = ffs_sblksize(fs, dp->di_extsize, nb);
577 			if (osize < fs->fs_bsize && osize > 0) {
578 				mutex_enter(&ump->um_lock);
579 				error = ffs_realloccg(ip, -1 - nb,
580 				    ffs_extb(fs, dp, nb),
581 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
582 					flags, &dp->di_extb[0]),
583 				    osize, (int)fs->fs_bsize, flags, cred,
584 				    &bp, &newb);
585 				if (error)
586 					return (error);
587 				dp->di_extsize = ffs_lblktosize(fs, nb + 1);
588 				dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
589 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
590 				if (flags & IO_SYNC)
591 					bwrite(bp);
592 				else
593 					bawrite(bp);
594 			}
595 		}
596 		/*
597 		 * All blocks are direct blocks
598 		 */
599 		nb = dp->di_extb[lbn];
600 		if (nb != 0 && dp->di_extsize >= ffs_lblktosize(fs, lbn + 1)) {
601 			error = bread(vp, -1 - lbn, fs->fs_bsize,
602 			    0, &bp);
603 			if (error) {
604 				return (error);
605 			}
606 			mutex_enter(bp->b_objlock);
607 			bp->b_blkno = FFS_FSBTODB(fs, nb);
608 			mutex_exit(bp->b_objlock);
609 			*bpp = bp;
610 			return (0);
611 		}
612 		if (nb != 0) {
613 			/*
614 			 * Consider need to reallocate a fragment.
615 			 */
616 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
617 			nsize = ffs_fragroundup(fs, size);
618 			if (nsize <= osize) {
619 				error = bread(vp, -1 - lbn, osize,
620 				    0, &bp);
621 				if (error) {
622 					return (error);
623 				}
624 				mutex_enter(bp->b_objlock);
625 				bp->b_blkno = FFS_FSBTODB(fs, nb);
626 				mutex_exit(bp->b_objlock);
627 			} else {
628 				mutex_enter(&ump->um_lock);
629 				error = ffs_realloccg(ip, -1 - lbn,
630 				    ffs_extb(fs, dp, lbn),
631 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
632 				        &dp->di_extb[0]),
633 				    osize, nsize, flags, cred, &bp, &newb);
634 				if (error)
635 					return (error);
636 			}
637 		} else {
638 			if (dp->di_extsize < ffs_lblktosize(fs, lbn + 1))
639 				nsize = ffs_fragroundup(fs, size);
640 			else
641 				nsize = fs->fs_bsize;
642 			mutex_enter(&ump->um_lock);
643 			error = ffs_alloc(ip, lbn,
644 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
645 			       &dp->di_extb[0]),
646 			   nsize, flags, cred, &newb);
647 			if (error)
648 				return (error);
649 			error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
650 			    nsize, (flags & B_CLRBUF) != 0, &bp);
651 			if (error)
652 				return error;
653 		}
654 		dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
655 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
656 		*bpp = bp;
657 		return (0);
658 	}
659 	/*
660 	 * If the next write will extend the file into a new block,
661 	 * and the file is currently composed of a fragment
662 	 * this fragment has to be extended to be a full block.
663 	 */
664 
665 	lastlbn = ffs_lblkno(fs, ip->i_size);
666 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
667 		nb = lastlbn;
668 		osize = ffs_blksize(fs, ip, nb);
669 		if (osize < fs->fs_bsize && osize > 0) {
670 			mutex_enter(&ump->um_lock);
671 			error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, lbn),
672 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
673 					&ip->i_ffs2_db[0]),
674 				    osize, (int)fs->fs_bsize, flags, cred, bpp,
675 				    &newb);
676 			if (error)
677 				return (error);
678 			ip->i_size = ffs_lblktosize(fs, nb + 1);
679 			ip->i_ffs2_size = ip->i_size;
680 			uvm_vnp_setsize(vp, ip->i_size);
681 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
682 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
683 			if (bpp) {
684 				if (flags & B_SYNC)
685 					bwrite(*bpp);
686 				else
687 					bawrite(*bpp);
688 			}
689 		}
690 	}
691 
692 	/*
693 	 * The first UFS_NDADDR blocks are direct blocks
694 	 */
695 
696 	if (lbn < UFS_NDADDR) {
697 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
698 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
699 
700 			/*
701 			 * The block is an already-allocated direct block
702 			 * and the file already extends past this block,
703 			 * thus this must be a whole block.
704 			 * Just read the block (if requested).
705 			 */
706 
707 			if (bpp != NULL) {
708 				error = bread(vp, lbn, fs->fs_bsize,
709 					      B_MODIFY, bpp);
710 				if (error) {
711 					return (error);
712 				}
713 			}
714 			return (0);
715 		}
716 		if (nb != 0) {
717 
718 			/*
719 			 * Consider need to reallocate a fragment.
720 			 */
721 
722 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
723 			nsize = ffs_fragroundup(fs, size);
724 			if (nsize <= osize) {
725 
726 				/*
727 				 * The existing block is already
728 				 * at least as big as we want.
729 				 * Just read the block (if requested).
730 				 */
731 
732 				if (bpp != NULL) {
733 					error = bread(vp, lbn, osize,
734 						      B_MODIFY, bpp);
735 					if (error) {
736 						return (error);
737 					}
738 				}
739 				return 0;
740 			} else {
741 
742 				/*
743 				 * The existing block is smaller than we want,
744 				 * grow it.
745 				 */
746 				mutex_enter(&ump->um_lock);
747 				error = ffs_realloccg(ip, lbn,
748 				    ffs_getdb(fs, ip, lbn),
749 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
750 					&ip->i_ffs2_db[0]),
751 				    osize, nsize, flags, cred, bpp, &newb);
752 				if (error)
753 					return (error);
754 			}
755 		} else {
756 
757 			/*
758 			 * the block was not previously allocated,
759 			 * allocate a new block or fragment.
760 			 */
761 
762 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
763 				nsize = ffs_fragroundup(fs, size);
764 			else
765 				nsize = fs->fs_bsize;
766 			mutex_enter(&ump->um_lock);
767 			error = ffs_alloc(ip, lbn,
768 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
769 				&ip->i_ffs2_db[0]),
770 			    nsize, flags, cred, &newb);
771 			if (error)
772 				return (error);
773 			if (bpp != NULL) {
774 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
775 				    nsize, (flags & B_CLRBUF) != 0, bpp);
776 				if (error)
777 					return error;
778 			}
779 		}
780 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
781 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
782 		return (0);
783 	}
784 
785 	/*
786 	 * Determine the number of levels of indirection.
787 	 */
788 
789 	pref = 0;
790 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
791 		return (error);
792 
793 	/*
794 	 * Fetch the first indirect block allocating if necessary.
795 	 */
796 
797 	--num;
798 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
799 	allocib = NULL;
800 	allocblk = allociblk;
801 	if (nb == 0) {
802 		mutex_enter(&ump->um_lock);
803 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
804 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
805 		    flags | B_METAONLY, cred, &newb);
806 		if (error)
807 			goto fail;
808 		nb = newb;
809 		*allocblk++ = nb;
810 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
811 		    fs->fs_bsize, true, &bp);
812 		if (error)
813 			goto fail;
814 		/*
815 		 * Write synchronously so that indirect blocks
816 		 * never point at garbage.
817 		 */
818 		if ((error = bwrite(bp)) != 0)
819 			goto fail;
820 		unwindidx = 0;
821 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
822 		*allocib = ufs_rw64(nb, needswap);
823 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
824 	}
825 
826 	/*
827 	 * Fetch through the indirect blocks, allocating as necessary.
828 	 */
829 
830 	for (i = 1;;) {
831 		error = bread(vp,
832 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
833 		if (error) {
834 			goto fail;
835 		}
836 		bap = (int64_t *)bp->b_data;
837 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
838 		if (i == num)
839 			break;
840 		i++;
841 		if (nb != 0) {
842 			brelse(bp, 0);
843 			continue;
844 		}
845 		if (fscow_run(bp, true) != 0) {
846 			brelse(bp, 0);
847 			goto fail;
848 		}
849 		mutex_enter(&ump->um_lock);
850 		/* Try to keep snapshot indirect blocks contiguous. */
851 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
852 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
853 			    flags | B_METAONLY, &bap[0]);
854 		if (pref == 0)
855 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
856 			    NULL);
857 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
858 		    flags | B_METAONLY, cred, &newb);
859 		if (error) {
860 			brelse(bp, 0);
861 			goto fail;
862 		}
863 		nb = newb;
864 		*allocblk++ = nb;
865 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
866 		    fs->fs_bsize, true, &nbp);
867 		if (error) {
868 			brelse(bp, 0);
869 			goto fail;
870 		}
871 		/*
872 		 * Write synchronously so that indirect blocks
873 		 * never point at garbage.
874 		 */
875 		if ((error = bwrite(nbp)) != 0) {
876 			brelse(bp, 0);
877 			goto fail;
878 		}
879 		if (unwindidx < 0)
880 			unwindidx = i - 1;
881 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
882 
883 		/*
884 		 * If required, write synchronously, otherwise use
885 		 * delayed write.
886 		 */
887 
888 		if (flags & B_SYNC) {
889 			bwrite(bp);
890 		} else {
891 			bdwrite(bp);
892 		}
893 	}
894 
895 	if (flags & B_METAONLY) {
896 		KASSERT(bpp != NULL);
897 		*bpp = bp;
898 		return (0);
899 	}
900 
901 	/*
902 	 * Get the data block, allocating if necessary.
903 	 */
904 
905 	if (nb == 0) {
906 		if (fscow_run(bp, true) != 0) {
907 			brelse(bp, 0);
908 			goto fail;
909 		}
910 		mutex_enter(&ump->um_lock);
911 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
912 		    &bap[0]);
913 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
914 		    &newb);
915 		if (error) {
916 			brelse(bp, 0);
917 			goto fail;
918 		}
919 		nb = newb;
920 		*allocblk++ = nb;
921 		if (bpp != NULL) {
922 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
923 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
924 			if (error) {
925 				brelse(bp, 0);
926 				goto fail;
927 			}
928 		}
929 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
930 		if (allocib == NULL && unwindidx < 0) {
931 			unwindidx = i - 1;
932 		}
933 
934 		/*
935 		 * If required, write synchronously, otherwise use
936 		 * delayed write.
937 		 */
938 
939 		if (flags & B_SYNC) {
940 			bwrite(bp);
941 		} else {
942 			bdwrite(bp);
943 		}
944 		return (0);
945 	}
946 	brelse(bp, 0);
947 	if (bpp != NULL) {
948 		if (flags & B_CLRBUF) {
949 			error = bread(vp, lbn, (int)fs->fs_bsize,
950 			    B_MODIFY, &nbp);
951 			if (error) {
952 				goto fail;
953 			}
954 		} else {
955 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
956 			    fs->fs_bsize, true, &nbp);
957 			if (error)
958 				goto fail;
959 		}
960 		*bpp = nbp;
961 	}
962 	return (0);
963 
964 fail:
965 	/*
966 	 * If we have failed part way through block allocation, we
967 	 * have to deallocate any indirect blocks that we have allocated.
968 	 */
969 
970 	if (unwindidx >= 0) {
971 
972 		/*
973 		 * First write out any buffers we've created to resolve their
974 		 * softdeps.  This must be done in reverse order of creation
975 		 * so that we resolve the dependencies in one pass.
976 		 * Write the cylinder group buffers for these buffers too.
977 		 */
978 
979 		for (i = num; i >= unwindidx; i--) {
980 			if (i == 0) {
981 				break;
982 			}
983 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
984 			    fs->fs_bsize, false, &bp) != 0)
985 				continue;
986 			if (bp->b_oflags & BO_DELWRI) {
987 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
988 				    FFS_DBTOFSB(fs, bp->b_blkno))));
989 				bwrite(bp);
990 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
991 				    fs->fs_cgsize, false, &bp) != 0)
992 					continue;
993 				if (bp->b_oflags & BO_DELWRI) {
994 					bwrite(bp);
995 				} else {
996 					brelse(bp, BC_INVAL);
997 				}
998 			} else {
999 				brelse(bp, BC_INVAL);
1000 			}
1001 		}
1002 
1003 		/*
1004 		 * Now that any dependencies that we created have been
1005 		 * resolved, we can undo the partial allocation.
1006 		 */
1007 
1008 		if (unwindidx == 0) {
1009 			*allocib = 0;
1010 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
1011 		} else {
1012 			int r;
1013 
1014 			r = bread(vp, indirs[unwindidx].in_lbn,
1015 			    (int)fs->fs_bsize, 0, &bp);
1016 			if (r) {
1017 				panic("Could not unwind indirect block, error %d", r);
1018 			} else {
1019 				bap = (int64_t *)bp->b_data;
1020 				bap[indirs[unwindidx].in_off] = 0;
1021 				bwrite(bp);
1022 			}
1023 		}
1024 		for (i = unwindidx + 1; i <= num; i++) {
1025 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1026 			    fs->fs_bsize, false, &bp) == 0)
1027 				brelse(bp, BC_INVAL);
1028 		}
1029 	}
1030 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1031 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1032 		deallocated += fs->fs_bsize;
1033 	}
1034 	if (deallocated) {
1035 #if defined(QUOTA) || defined(QUOTA2)
1036 		/*
1037 		 * Restore user's disk quota because allocation failed.
1038 		 */
1039 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1040 #endif
1041 		ip->i_ffs2_blocks -= btodb(deallocated);
1042 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1043 	}
1044 
1045 	return (error);
1046 }
1047