1 /* $NetBSD: ffs_balloc.c,v 1.66 2022/11/17 06:40:40 chs Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.66 2022/11/17 06:40:40 chs Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #include "opt_uvmhist.h"
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/buf.h>
54 #include <sys/file.h>
55 #include <sys/mount.h>
56 #include <sys/vnode.h>
57 #include <sys/kauth.h>
58 #include <sys/fstrans.h>
59
60 #include <ufs/ufs/quota.h>
61 #include <ufs/ufs/ufsmount.h>
62 #include <ufs/ufs/inode.h>
63 #include <ufs/ufs/ufs_extern.h>
64 #include <ufs/ufs/ufs_bswap.h>
65
66 #include <ufs/ffs/fs.h>
67 #include <ufs/ffs/ffs_extern.h>
68
69 #ifdef UVMHIST
70 #include <uvm/uvm.h>
71 #endif
72 #include <uvm/uvm_extern.h>
73 #include <uvm/uvm_stat.h>
74
75 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
76 struct buf **);
77 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
78 struct buf **);
79
80 static daddr_t
ffs_extb(struct fs * fs,struct ufs2_dinode * dp,daddr_t nb)81 ffs_extb(struct fs *fs, struct ufs2_dinode *dp, daddr_t nb)
82 {
83 return ufs_rw64(dp->di_extb[nb], UFS_FSNEEDSWAP(fs));
84 }
85
86 /*
87 * Balloc defines the structure of file system storage
88 * by allocating the physical blocks on a device given
89 * the inode and the logical block number in a file.
90 */
91
92 int
ffs_balloc(struct vnode * vp,off_t off,int size,kauth_cred_t cred,int flags,struct buf ** bpp)93 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
94 struct buf **bpp)
95 {
96 int error;
97
98 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
99 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
100 else
101 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
102
103 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
104 brelse(*bpp, 0);
105
106 return error;
107 }
108
109 static int
ffs_balloc_ufs1(struct vnode * vp,off_t off,int size,kauth_cred_t cred,int flags,struct buf ** bpp)110 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
111 int flags, struct buf **bpp)
112 {
113 daddr_t lbn, lastlbn;
114 struct buf *bp, *nbp;
115 struct inode *ip = VTOI(vp);
116 struct fs *fs = ip->i_fs;
117 struct ufsmount *ump = ip->i_ump;
118 struct indir indirs[UFS_NIADDR + 2];
119 daddr_t newb, pref, nb;
120 int32_t *bap; /* XXX ondisk32 */
121 int deallocated, osize, nsize, num, i, error;
122 int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
123 int32_t *allocib;
124 int unwindidx = -1;
125 const int needswap = UFS_FSNEEDSWAP(fs);
126 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
127
128 lbn = ffs_lblkno(fs, off);
129 size = ffs_blkoff(fs, off) + size;
130 if (size > fs->fs_bsize)
131 panic("ffs_balloc: blk too big");
132 if (bpp != NULL) {
133 *bpp = NULL;
134 }
135 UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
136 lbn, size, 0);
137
138 if (lbn < 0)
139 return (EFBIG);
140
141 /*
142 * If the next write will extend the file into a new block,
143 * and the file is currently composed of a fragment
144 * this fragment has to be extended to be a full block.
145 */
146
147 lastlbn = ffs_lblkno(fs, ip->i_size);
148 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
149 nb = lastlbn;
150 osize = ffs_blksize(fs, ip, nb);
151 if (osize < fs->fs_bsize && osize > 0) {
152 mutex_enter(&ump->um_lock);
153 error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, nb),
154 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
155 &ip->i_ffs1_db[0]),
156 osize, (int)fs->fs_bsize, flags, cred, bpp,
157 &newb);
158 if (error)
159 return (error);
160 ip->i_size = ffs_lblktosize(fs, nb + 1);
161 ip->i_ffs1_size = ip->i_size;
162 uvm_vnp_setsize(vp, ip->i_ffs1_size);
163 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
164 ip->i_flag |= IN_CHANGE | IN_UPDATE;
165 if (bpp && *bpp) {
166 if (flags & B_SYNC)
167 bwrite(*bpp);
168 else
169 bawrite(*bpp);
170 }
171 }
172 }
173
174 /*
175 * The first UFS_NDADDR blocks are direct blocks
176 */
177
178 if (lbn < UFS_NDADDR) {
179 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
180 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
181
182 /*
183 * The block is an already-allocated direct block
184 * and the file already extends past this block,
185 * thus this must be a whole block.
186 * Just read the block (if requested).
187 */
188
189 if (bpp != NULL) {
190 error = bread(vp, lbn, fs->fs_bsize,
191 B_MODIFY, bpp);
192 if (error) {
193 return (error);
194 }
195 }
196 return (0);
197 }
198 if (nb != 0) {
199
200 /*
201 * Consider need to reallocate a fragment.
202 */
203
204 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
205 nsize = ffs_fragroundup(fs, size);
206 if (nsize <= osize) {
207
208 /*
209 * The existing block is already
210 * at least as big as we want.
211 * Just read the block (if requested).
212 */
213
214 if (bpp != NULL) {
215 error = bread(vp, lbn, osize,
216 B_MODIFY, bpp);
217 if (error) {
218 return (error);
219 }
220 }
221 return 0;
222 } else {
223
224 /*
225 * The existing block is smaller than we want,
226 * grow it.
227 */
228 mutex_enter(&ump->um_lock);
229 error = ffs_realloccg(ip, lbn,
230 ffs_getdb(fs, ip, lbn),
231 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
232 &ip->i_ffs1_db[0]),
233 osize, nsize, flags, cred, bpp, &newb);
234 if (error)
235 return (error);
236 }
237 } else {
238
239 /*
240 * the block was not previously allocated,
241 * allocate a new block or fragment.
242 */
243
244 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
245 nsize = ffs_fragroundup(fs, size);
246 else
247 nsize = fs->fs_bsize;
248 mutex_enter(&ump->um_lock);
249 error = ffs_alloc(ip, lbn,
250 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
251 &ip->i_ffs1_db[0]),
252 nsize, flags, cred, &newb);
253 if (error)
254 return (error);
255 if (bpp != NULL) {
256 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
257 nsize, (flags & B_CLRBUF) != 0, bpp);
258 if (error)
259 return error;
260 }
261 }
262 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
263 ip->i_flag |= IN_CHANGE | IN_UPDATE;
264 return (0);
265 }
266
267 /*
268 * Determine the number of levels of indirection.
269 */
270
271 pref = 0;
272 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
273 return (error);
274
275 /*
276 * Fetch the first indirect block allocating if necessary.
277 */
278
279 --num;
280 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
281 allocib = NULL;
282 allocblk = allociblk;
283 if (nb == 0) {
284 mutex_enter(&ump->um_lock);
285 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
286 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
287 flags | B_METAONLY, cred, &newb);
288 if (error)
289 goto fail;
290 nb = newb;
291 *allocblk++ = nb;
292 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
293 fs->fs_bsize, true, &bp);
294 if (error)
295 goto fail;
296 /*
297 * Write synchronously so that indirect blocks
298 * never point at garbage.
299 */
300 if ((error = bwrite(bp)) != 0)
301 goto fail;
302 unwindidx = 0;
303 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
304 *allocib = ufs_rw32(nb, needswap);
305 ip->i_flag |= IN_CHANGE | IN_UPDATE;
306 }
307
308 /*
309 * Fetch through the indirect blocks, allocating as necessary.
310 */
311
312 for (i = 1;;) {
313 error = bread(vp,
314 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
315 if (error) {
316 goto fail;
317 }
318 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
319 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
320 if (i == num)
321 break;
322 i++;
323 if (nb != 0) {
324 brelse(bp, 0);
325 continue;
326 }
327 if (fscow_run(bp, true) != 0) {
328 brelse(bp, 0);
329 goto fail;
330 }
331 mutex_enter(&ump->um_lock);
332 /* Try to keep snapshot indirect blocks contiguous. */
333 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
334 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
335 flags | B_METAONLY, &bap[0]);
336 if (pref == 0)
337 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
338 NULL);
339 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
340 flags | B_METAONLY, cred, &newb);
341 if (error) {
342 brelse(bp, 0);
343 goto fail;
344 }
345 nb = newb;
346 *allocblk++ = nb;
347 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
348 fs->fs_bsize, true, &nbp);
349 if (error) {
350 brelse(bp, 0);
351 goto fail;
352 }
353 /*
354 * Write synchronously so that indirect blocks
355 * never point at garbage.
356 */
357 if ((error = bwrite(nbp)) != 0) {
358 brelse(bp, 0);
359 goto fail;
360 }
361 if (unwindidx < 0)
362 unwindidx = i - 1;
363 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
364
365 /*
366 * If required, write synchronously, otherwise use
367 * delayed write.
368 */
369
370 if (flags & B_SYNC) {
371 bwrite(bp);
372 } else {
373 bdwrite(bp);
374 }
375 }
376
377 if (flags & B_METAONLY) {
378 KASSERT(bpp != NULL);
379 *bpp = bp;
380 return (0);
381 }
382
383 /*
384 * Get the data block, allocating if necessary.
385 */
386
387 if (nb == 0) {
388 if (fscow_run(bp, true) != 0) {
389 brelse(bp, 0);
390 goto fail;
391 }
392 mutex_enter(&ump->um_lock);
393 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
394 &bap[0]);
395 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
396 &newb);
397 if (error) {
398 brelse(bp, 0);
399 goto fail;
400 }
401 nb = newb;
402 *allocblk++ = nb;
403 if (bpp != NULL) {
404 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
405 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
406 if (error) {
407 brelse(bp, 0);
408 goto fail;
409 }
410 }
411 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
412 if (allocib == NULL && unwindidx < 0) {
413 unwindidx = i - 1;
414 }
415
416 /*
417 * If required, write synchronously, otherwise use
418 * delayed write.
419 */
420
421 if (flags & B_SYNC) {
422 bwrite(bp);
423 } else {
424 bdwrite(bp);
425 }
426 return (0);
427 }
428 brelse(bp, 0);
429 if (bpp != NULL) {
430 if (flags & B_CLRBUF) {
431 error = bread(vp, lbn, (int)fs->fs_bsize,
432 B_MODIFY, &nbp);
433 if (error) {
434 goto fail;
435 }
436 } else {
437 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
438 fs->fs_bsize, true, &nbp);
439 if (error)
440 goto fail;
441 }
442 *bpp = nbp;
443 }
444 return (0);
445
446 fail:
447 /*
448 * If we have failed part way through block allocation, we
449 * have to deallocate any indirect blocks that we have allocated.
450 */
451
452 if (unwindidx >= 0) {
453
454 /*
455 * First write out any buffers we've created to resolve their
456 * softdeps. This must be done in reverse order of creation
457 * so that we resolve the dependencies in one pass.
458 * Write the cylinder group buffers for these buffers too.
459 */
460
461 for (i = num; i >= unwindidx; i--) {
462 if (i == 0) {
463 break;
464 }
465 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
466 fs->fs_bsize, false, &bp) != 0)
467 continue;
468 if (bp->b_oflags & BO_DELWRI) {
469 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
470 FFS_DBTOFSB(fs, bp->b_blkno))));
471 bwrite(bp);
472 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
473 fs->fs_cgsize, false, &bp) != 0)
474 continue;
475 if (bp->b_oflags & BO_DELWRI) {
476 bwrite(bp);
477 } else {
478 brelse(bp, BC_INVAL);
479 }
480 } else {
481 brelse(bp, BC_INVAL);
482 }
483 }
484
485 /*
486 * Undo the partial allocation.
487 */
488 if (unwindidx == 0) {
489 *allocib = 0;
490 ip->i_flag |= IN_CHANGE | IN_UPDATE;
491 } else {
492 int r;
493
494 r = bread(vp, indirs[unwindidx].in_lbn,
495 (int)fs->fs_bsize, 0, &bp);
496 if (r) {
497 panic("Could not unwind indirect block, error %d", r);
498 } else {
499 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
500 bap[indirs[unwindidx].in_off] = 0;
501 bwrite(bp);
502 }
503 }
504 for (i = unwindidx + 1; i <= num; i++) {
505 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
506 fs->fs_bsize, false, &bp) == 0)
507 brelse(bp, BC_INVAL);
508 }
509 }
510 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
511 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
512 deallocated += fs->fs_bsize;
513 }
514 if (deallocated) {
515 #if defined(QUOTA) || defined(QUOTA2)
516 /*
517 * Restore user's disk quota because allocation failed.
518 */
519 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
520 #endif
521 ip->i_ffs1_blocks -= btodb(deallocated);
522 ip->i_flag |= IN_CHANGE | IN_UPDATE;
523 }
524 return (error);
525 }
526
527 static int
ffs_balloc_ufs2(struct vnode * vp,off_t off,int size,kauth_cred_t cred,int flags,struct buf ** bpp)528 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
529 int flags, struct buf **bpp)
530 {
531 daddr_t lbn, lastlbn;
532 struct buf *bp, *nbp;
533 struct inode *ip = VTOI(vp);
534 struct fs *fs = ip->i_fs;
535 struct ufsmount *ump = ip->i_ump;
536 struct indir indirs[UFS_NIADDR + 2];
537 daddr_t newb, pref, nb;
538 int64_t *bap;
539 int deallocated, osize, nsize, num, i, error;
540 daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
541 int64_t *allocib;
542 int unwindidx = -1;
543 const int needswap = UFS_FSNEEDSWAP(fs);
544 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
545
546 KASSERT((ump->um_flags & UFS_EA) != 0 || (flags & IO_EXT) == 0);
547
548 lbn = ffs_lblkno(fs, off);
549 size = ffs_blkoff(fs, off) + size;
550 if (size > fs->fs_bsize)
551 panic("ffs_balloc: blk too big");
552 if (bpp != NULL) {
553 *bpp = NULL;
554 }
555 UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
556 lbn, size, 0);
557
558 if (lbn < 0)
559 return (EFBIG);
560
561 /*
562 * Check for allocating external data.
563 */
564 if (flags & IO_EXT) {
565 struct ufs2_dinode *dp = ip->i_din.ffs2_din;
566 if (lbn >= UFS_NXADDR)
567 return (EFBIG);
568 /*
569 * If the next write will extend the data into a new block,
570 * and the data is currently composed of a fragment
571 * this fragment has to be extended to be a full block.
572 */
573 lastlbn = ffs_lblkno(fs, dp->di_extsize);
574 if (lastlbn < lbn) {
575 nb = lastlbn;
576 osize = ffs_sblksize(fs, dp->di_extsize, nb);
577 if (osize < fs->fs_bsize && osize > 0) {
578 mutex_enter(&ump->um_lock);
579 error = ffs_realloccg(ip, -1 - nb,
580 ffs_extb(fs, dp, nb),
581 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
582 flags, &dp->di_extb[0]),
583 osize, (int)fs->fs_bsize, flags, cred,
584 &bp, &newb);
585 if (error)
586 return (error);
587 dp->di_extsize = ffs_lblktosize(fs, nb + 1);
588 dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
589 ip->i_flag |= IN_CHANGE | IN_UPDATE;
590 if (flags & IO_SYNC)
591 bwrite(bp);
592 else
593 bawrite(bp);
594 }
595 }
596 /*
597 * All blocks are direct blocks
598 */
599 nb = dp->di_extb[lbn];
600 if (nb != 0 && dp->di_extsize >= ffs_lblktosize(fs, lbn + 1)) {
601 error = bread(vp, -1 - lbn, fs->fs_bsize,
602 0, &bp);
603 if (error) {
604 return (error);
605 }
606 mutex_enter(bp->b_objlock);
607 bp->b_blkno = FFS_FSBTODB(fs, nb);
608 mutex_exit(bp->b_objlock);
609 *bpp = bp;
610 return (0);
611 }
612 if (nb != 0) {
613 /*
614 * Consider need to reallocate a fragment.
615 */
616 osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
617 nsize = ffs_fragroundup(fs, size);
618 if (nsize <= osize) {
619 error = bread(vp, -1 - lbn, osize,
620 0, &bp);
621 if (error) {
622 return (error);
623 }
624 mutex_enter(bp->b_objlock);
625 bp->b_blkno = FFS_FSBTODB(fs, nb);
626 mutex_exit(bp->b_objlock);
627 } else {
628 mutex_enter(&ump->um_lock);
629 error = ffs_realloccg(ip, -1 - lbn,
630 ffs_extb(fs, dp, lbn),
631 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
632 &dp->di_extb[0]),
633 osize, nsize, flags, cred, &bp, &newb);
634 if (error)
635 return (error);
636 }
637 } else {
638 if (dp->di_extsize < ffs_lblktosize(fs, lbn + 1))
639 nsize = ffs_fragroundup(fs, size);
640 else
641 nsize = fs->fs_bsize;
642 mutex_enter(&ump->um_lock);
643 error = ffs_alloc(ip, lbn,
644 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
645 &dp->di_extb[0]),
646 nsize, flags, cred, &newb);
647 if (error)
648 return (error);
649 error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
650 nsize, (flags & B_CLRBUF) != 0, &bp);
651 if (error)
652 return error;
653 }
654 dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
655 ip->i_flag |= IN_CHANGE | IN_UPDATE;
656 *bpp = bp;
657 return (0);
658 }
659 /*
660 * If the next write will extend the file into a new block,
661 * and the file is currently composed of a fragment
662 * this fragment has to be extended to be a full block.
663 */
664
665 lastlbn = ffs_lblkno(fs, ip->i_size);
666 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
667 nb = lastlbn;
668 osize = ffs_blksize(fs, ip, nb);
669 if (osize < fs->fs_bsize && osize > 0) {
670 mutex_enter(&ump->um_lock);
671 error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, lbn),
672 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
673 &ip->i_ffs2_db[0]),
674 osize, (int)fs->fs_bsize, flags, cred, bpp,
675 &newb);
676 if (error)
677 return (error);
678 ip->i_size = ffs_lblktosize(fs, nb + 1);
679 ip->i_ffs2_size = ip->i_size;
680 uvm_vnp_setsize(vp, ip->i_size);
681 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
682 ip->i_flag |= IN_CHANGE | IN_UPDATE;
683 if (bpp) {
684 if (flags & B_SYNC)
685 bwrite(*bpp);
686 else
687 bawrite(*bpp);
688 }
689 }
690 }
691
692 /*
693 * The first UFS_NDADDR blocks are direct blocks
694 */
695
696 if (lbn < UFS_NDADDR) {
697 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
698 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
699
700 /*
701 * The block is an already-allocated direct block
702 * and the file already extends past this block,
703 * thus this must be a whole block.
704 * Just read the block (if requested).
705 */
706
707 if (bpp != NULL) {
708 error = bread(vp, lbn, fs->fs_bsize,
709 B_MODIFY, bpp);
710 if (error) {
711 return (error);
712 }
713 }
714 return (0);
715 }
716 if (nb != 0) {
717
718 /*
719 * Consider need to reallocate a fragment.
720 */
721
722 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
723 nsize = ffs_fragroundup(fs, size);
724 if (nsize <= osize) {
725
726 /*
727 * The existing block is already
728 * at least as big as we want.
729 * Just read the block (if requested).
730 */
731
732 if (bpp != NULL) {
733 error = bread(vp, lbn, osize,
734 B_MODIFY, bpp);
735 if (error) {
736 return (error);
737 }
738 }
739 return 0;
740 } else {
741
742 /*
743 * The existing block is smaller than we want,
744 * grow it.
745 */
746 mutex_enter(&ump->um_lock);
747 error = ffs_realloccg(ip, lbn,
748 ffs_getdb(fs, ip, lbn),
749 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
750 &ip->i_ffs2_db[0]),
751 osize, nsize, flags, cred, bpp, &newb);
752 if (error)
753 return (error);
754 }
755 } else {
756
757 /*
758 * the block was not previously allocated,
759 * allocate a new block or fragment.
760 */
761
762 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
763 nsize = ffs_fragroundup(fs, size);
764 else
765 nsize = fs->fs_bsize;
766 mutex_enter(&ump->um_lock);
767 error = ffs_alloc(ip, lbn,
768 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
769 &ip->i_ffs2_db[0]),
770 nsize, flags, cred, &newb);
771 if (error)
772 return (error);
773 if (bpp != NULL) {
774 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
775 nsize, (flags & B_CLRBUF) != 0, bpp);
776 if (error)
777 return error;
778 }
779 }
780 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
781 ip->i_flag |= IN_CHANGE | IN_UPDATE;
782 return (0);
783 }
784
785 /*
786 * Determine the number of levels of indirection.
787 */
788
789 pref = 0;
790 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
791 return (error);
792
793 /*
794 * Fetch the first indirect block allocating if necessary.
795 */
796
797 --num;
798 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
799 allocib = NULL;
800 allocblk = allociblk;
801 if (nb == 0) {
802 mutex_enter(&ump->um_lock);
803 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
804 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
805 flags | B_METAONLY, cred, &newb);
806 if (error)
807 goto fail;
808 nb = newb;
809 *allocblk++ = nb;
810 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
811 fs->fs_bsize, true, &bp);
812 if (error)
813 goto fail;
814 /*
815 * Write synchronously so that indirect blocks
816 * never point at garbage.
817 */
818 if ((error = bwrite(bp)) != 0)
819 goto fail;
820 unwindidx = 0;
821 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
822 *allocib = ufs_rw64(nb, needswap);
823 ip->i_flag |= IN_CHANGE | IN_UPDATE;
824 }
825
826 /*
827 * Fetch through the indirect blocks, allocating as necessary.
828 */
829
830 for (i = 1;;) {
831 error = bread(vp,
832 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
833 if (error) {
834 goto fail;
835 }
836 bap = (int64_t *)bp->b_data;
837 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
838 if (i == num)
839 break;
840 i++;
841 if (nb != 0) {
842 brelse(bp, 0);
843 continue;
844 }
845 if (fscow_run(bp, true) != 0) {
846 brelse(bp, 0);
847 goto fail;
848 }
849 mutex_enter(&ump->um_lock);
850 /* Try to keep snapshot indirect blocks contiguous. */
851 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
852 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
853 flags | B_METAONLY, &bap[0]);
854 if (pref == 0)
855 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
856 NULL);
857 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
858 flags | B_METAONLY, cred, &newb);
859 if (error) {
860 brelse(bp, 0);
861 goto fail;
862 }
863 nb = newb;
864 *allocblk++ = nb;
865 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
866 fs->fs_bsize, true, &nbp);
867 if (error) {
868 brelse(bp, 0);
869 goto fail;
870 }
871 /*
872 * Write synchronously so that indirect blocks
873 * never point at garbage.
874 */
875 if ((error = bwrite(nbp)) != 0) {
876 brelse(bp, 0);
877 goto fail;
878 }
879 if (unwindidx < 0)
880 unwindidx = i - 1;
881 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
882
883 /*
884 * If required, write synchronously, otherwise use
885 * delayed write.
886 */
887
888 if (flags & B_SYNC) {
889 bwrite(bp);
890 } else {
891 bdwrite(bp);
892 }
893 }
894
895 if (flags & B_METAONLY) {
896 KASSERT(bpp != NULL);
897 *bpp = bp;
898 return (0);
899 }
900
901 /*
902 * Get the data block, allocating if necessary.
903 */
904
905 if (nb == 0) {
906 if (fscow_run(bp, true) != 0) {
907 brelse(bp, 0);
908 goto fail;
909 }
910 mutex_enter(&ump->um_lock);
911 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
912 &bap[0]);
913 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
914 &newb);
915 if (error) {
916 brelse(bp, 0);
917 goto fail;
918 }
919 nb = newb;
920 *allocblk++ = nb;
921 if (bpp != NULL) {
922 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
923 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
924 if (error) {
925 brelse(bp, 0);
926 goto fail;
927 }
928 }
929 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
930 if (allocib == NULL && unwindidx < 0) {
931 unwindidx = i - 1;
932 }
933
934 /*
935 * If required, write synchronously, otherwise use
936 * delayed write.
937 */
938
939 if (flags & B_SYNC) {
940 bwrite(bp);
941 } else {
942 bdwrite(bp);
943 }
944 return (0);
945 }
946 brelse(bp, 0);
947 if (bpp != NULL) {
948 if (flags & B_CLRBUF) {
949 error = bread(vp, lbn, (int)fs->fs_bsize,
950 B_MODIFY, &nbp);
951 if (error) {
952 goto fail;
953 }
954 } else {
955 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
956 fs->fs_bsize, true, &nbp);
957 if (error)
958 goto fail;
959 }
960 *bpp = nbp;
961 }
962 return (0);
963
964 fail:
965 /*
966 * If we have failed part way through block allocation, we
967 * have to deallocate any indirect blocks that we have allocated.
968 */
969
970 if (unwindidx >= 0) {
971
972 /*
973 * First write out any buffers we've created to resolve their
974 * softdeps. This must be done in reverse order of creation
975 * so that we resolve the dependencies in one pass.
976 * Write the cylinder group buffers for these buffers too.
977 */
978
979 for (i = num; i >= unwindidx; i--) {
980 if (i == 0) {
981 break;
982 }
983 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
984 fs->fs_bsize, false, &bp) != 0)
985 continue;
986 if (bp->b_oflags & BO_DELWRI) {
987 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
988 FFS_DBTOFSB(fs, bp->b_blkno))));
989 bwrite(bp);
990 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
991 fs->fs_cgsize, false, &bp) != 0)
992 continue;
993 if (bp->b_oflags & BO_DELWRI) {
994 bwrite(bp);
995 } else {
996 brelse(bp, BC_INVAL);
997 }
998 } else {
999 brelse(bp, BC_INVAL);
1000 }
1001 }
1002
1003 /*
1004 * Now that any dependencies that we created have been
1005 * resolved, we can undo the partial allocation.
1006 */
1007
1008 if (unwindidx == 0) {
1009 *allocib = 0;
1010 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1011 } else {
1012 int r;
1013
1014 r = bread(vp, indirs[unwindidx].in_lbn,
1015 (int)fs->fs_bsize, 0, &bp);
1016 if (r) {
1017 panic("Could not unwind indirect block, error %d", r);
1018 } else {
1019 bap = (int64_t *)bp->b_data;
1020 bap[indirs[unwindidx].in_off] = 0;
1021 bwrite(bp);
1022 }
1023 }
1024 for (i = unwindidx + 1; i <= num; i++) {
1025 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1026 fs->fs_bsize, false, &bp) == 0)
1027 brelse(bp, BC_INVAL);
1028 }
1029 }
1030 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1031 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1032 deallocated += fs->fs_bsize;
1033 }
1034 if (deallocated) {
1035 #if defined(QUOTA) || defined(QUOTA2)
1036 /*
1037 * Restore user's disk quota because allocation failed.
1038 */
1039 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1040 #endif
1041 ip->i_ffs2_blocks -= btodb(deallocated);
1042 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1043 }
1044
1045 return (error);
1046 }
1047