xref: /netbsd-src/sys/ufs/ext2fs/ext2fs_balloc.c (revision 9fbd88883c38d0c0fbfcbe66d76fe6b0fab3f9de)
1 /*	$NetBSD: ext2fs_balloc.c,v 1.16 2002/01/26 08:32:06 chs Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Manuel Bouyer.
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)ffs_balloc.c	8.4 (Berkeley) 9/23/93
37  * Modified for ext2fs by Manuel Bouyer.
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: ext2fs_balloc.c,v 1.16 2002/01/26 08:32:06 chs Exp $");
42 
43 #if defined(_KERNEL_OPT)
44 #include "opt_uvmhist.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/buf.h>
50 #include <sys/proc.h>
51 #include <sys/file.h>
52 #include <sys/vnode.h>
53 #include <sys/mount.h>
54 
55 #include <uvm/uvm.h>
56 
57 #include <ufs/ufs/inode.h>
58 #include <ufs/ufs/ufs_extern.h>
59 
60 #include <ufs/ext2fs/ext2fs.h>
61 #include <ufs/ext2fs/ext2fs_extern.h>
62 
63 /*
64  * Balloc defines the structure of file system storage
65  * by allocating the physical blocks on a device given
66  * the inode and the logical block number in a file.
67  */
68 int
69 ext2fs_balloc(ip, bn, size, cred, bpp, flags)
70 	struct inode *ip;
71 	ufs_daddr_t bn;
72 	int size;
73 	struct ucred *cred;
74 	struct buf **bpp;
75 	int flags;
76 {
77 	struct m_ext2fs *fs;
78 	ufs_daddr_t nb;
79 	struct buf *bp, *nbp;
80 	struct vnode *vp = ITOV(ip);
81 	struct indir indirs[NIADDR + 2];
82 	ufs_daddr_t newb, lbn, *bap, pref;
83 	int num, i, error;
84 	u_int deallocated;
85 	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
86 	int unwindidx = -1;
87 	UVMHIST_FUNC("ext2fs_balloc"); UVMHIST_CALLED(ubchist);
88 
89 	UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0);
90 
91 	if (bpp != NULL) {
92 		*bpp = NULL;
93 	}
94 	if (bn < 0)
95 		return (EFBIG);
96 	fs = ip->i_e2fs;
97 	lbn = bn;
98 
99 	/*
100 	 * The first NDADDR blocks are direct blocks
101 	 */
102 	if (bn < NDADDR) {
103 		nb = fs2h32(ip->i_e2fs_blocks[bn]);
104 		if (nb != 0) {
105 
106 			/*
107 			 * the block is already allocated, just read it.
108 			 */
109 
110 			if (bpp != NULL) {
111 				error = bread(vp, bn, fs->e2fs_bsize, NOCRED,
112 					      &bp);
113 				if (error) {
114 					brelse(bp);
115 					return (error);
116 				}
117 				*bpp = bp;
118 			}
119 			return (0);
120 		}
121 
122 		/*
123 		 * allocate a new direct block.
124 		 */
125 
126 		error = ext2fs_alloc(ip, bn,
127 		    ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]),
128 		    cred, &newb);
129 		if (error)
130 			return (error);
131 		ip->i_e2fs_last_lblk = lbn;
132 		ip->i_e2fs_last_blk = newb;
133 		ip->i_e2fs_blocks[bn] = h2fs32(newb);
134 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
135 		if (bpp != NULL) {
136 			bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
137 			bp->b_blkno = fsbtodb(fs, newb);
138 			if (flags & B_CLRBUF)
139 				clrbuf(bp);
140 			*bpp = bp;
141 		}
142 		return (0);
143 	}
144 	/*
145 	 * Determine the number of levels of indirection.
146 	 */
147 	pref = 0;
148 	if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0)
149 		return(error);
150 #ifdef DIAGNOSTIC
151 	if (num < 1)
152 		panic ("ext2fs_balloc: ufs_getlbns returned indirect block\n");
153 #endif
154 	/*
155 	 * Fetch the first indirect block allocating if necessary.
156 	 */
157 	--num;
158 	nb = fs2h32(ip->i_e2fs_blocks[NDADDR + indirs[0].in_off]);
159 	allocib = NULL;
160 	allocblk = allociblk;
161 	if (nb == 0) {
162 		pref = ext2fs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
163 		error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
164 		if (error)
165 			return (error);
166 		nb = newb;
167 		*allocblk++ = nb;
168 		ip->i_e2fs_last_blk = newb;
169 		bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0);
170 		bp->b_blkno = fsbtodb(fs, newb);
171 		clrbuf(bp);
172 		/*
173 		 * Write synchronously so that indirect blocks
174 		 * never point at garbage.
175 		 */
176 		if ((error = bwrite(bp)) != 0)
177 			goto fail;
178 		unwindidx = 0;
179 		allocib = &ip->i_e2fs_blocks[NDADDR + indirs[0].in_off];
180 		*allocib = h2fs32(newb);
181 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
182 	}
183 	/*
184 	 * Fetch through the indirect blocks, allocating as necessary.
185 	 */
186 	for (i = 1;;) {
187 		error = bread(vp,
188 		    indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp);
189 		if (error) {
190 			brelse(bp);
191 			goto fail;
192 		}
193 		bap = (ufs_daddr_t *)bp->b_data;
194 		nb = fs2h32(bap[indirs[i].in_off]);
195 		if (i == num)
196 			break;
197 		i++;
198 		if (nb != 0) {
199 			brelse(bp);
200 			continue;
201 		}
202 		pref = ext2fs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
203 		error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
204 		if (error) {
205 			brelse(bp);
206 			goto fail;
207 		}
208 		nb = newb;
209 		*allocblk++ = nb;
210 		ip->i_e2fs_last_blk = newb;
211 		nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0);
212 		nbp->b_blkno = fsbtodb(fs, nb);
213 		clrbuf(nbp);
214 		/*
215 		 * Write synchronously so that indirect blocks
216 		 * never point at garbage.
217 		 */
218 		if ((error = bwrite(nbp)) != 0) {
219 			brelse(bp);
220 			goto fail;
221 		}
222 		if (unwindidx < 0)
223 			unwindidx = i - 1;
224 		bap[indirs[i - 1].in_off] = h2fs32(nb);
225 		/*
226 		 * If required, write synchronously, otherwise use
227 		 * delayed write.
228 		 */
229 		if (flags & B_SYNC) {
230 			bwrite(bp);
231 		} else {
232 			bdwrite(bp);
233 		}
234 	}
235 	/*
236 	 * Get the data block, allocating if necessary.
237 	 */
238 	if (nb == 0) {
239 		pref = ext2fs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
240 		error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
241 		if (error) {
242 			brelse(bp);
243 			goto fail;
244 		}
245 		nb = newb;
246 		*allocblk++ = nb;
247 		ip->i_e2fs_last_lblk = lbn;
248 		ip->i_e2fs_last_blk = newb;
249 		bap[indirs[num].in_off] = h2fs32(nb);
250 		/*
251 		 * If required, write synchronously, otherwise use
252 		 * delayed write.
253 		 */
254 		if (flags & B_SYNC) {
255 			bwrite(bp);
256 		} else {
257 			bdwrite(bp);
258 		}
259 		if (bpp != NULL) {
260 			nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
261 			nbp->b_blkno = fsbtodb(fs, nb);
262 			if (flags & B_CLRBUF)
263 				clrbuf(nbp);
264 			*bpp = nbp;
265 		}
266 		return (0);
267 	}
268 	brelse(bp);
269 	if (bpp != NULL) {
270 		if (flags & B_CLRBUF) {
271 			error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED,
272 				      &nbp);
273 			if (error) {
274 				brelse(nbp);
275 				goto fail;
276 			}
277 		} else {
278 			nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
279 			nbp->b_blkno = fsbtodb(fs, nb);
280 		}
281 		*bpp = nbp;
282 	}
283 	return (0);
284 fail:
285 	/*
286 	 * If we have failed part way through block allocation, we
287 	 * have to deallocate any indirect blocks that we have allocated.
288 	 */
289 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
290 		ext2fs_blkfree(ip, *blkp);
291 		deallocated += fs->e2fs_bsize;
292 	}
293 	if (unwindidx >= 0) {
294 		if (unwindidx == 0) {
295 			*allocib = 0;
296 		} else {
297 			int r;
298 
299 			r = bread(vp, indirs[unwindidx].in_lbn,
300 			    (int)fs->e2fs_bsize, NOCRED, &bp);
301 			if (r) {
302 				panic("Could not unwind indirect block, error %d", r);
303 				brelse(bp);
304 			} else {
305 				bap = (ufs_daddr_t *)bp->b_data;
306 				bap[indirs[unwindidx].in_off] = 0;
307 				if (flags & B_SYNC)
308 					bwrite(bp);
309 				else
310 					bdwrite(bp);
311 			}
312 		}
313 		for (i = unwindidx + 1; i <= num; i++) {
314 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize,
315 			    0, 0);
316 			bp->b_flags |= B_INVAL;
317 			brelse(bp);
318 		}
319 	}
320 	if (deallocated) {
321 		ip->i_e2fs_nblock -= btodb(deallocated);
322 		ip->i_e2fs_flags |= IN_CHANGE | IN_UPDATE;
323 	}
324 	return error;
325 }
326 
327 int
328 ext2fs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
329     struct ucred *cred)
330 {
331 	struct inode *ip = VTOI(vp);
332 	struct m_ext2fs *fs = ip->i_e2fs;
333 	int error, delta, bshift, bsize;
334 	UVMHIST_FUNC("ext2fs_gop_alloc"); UVMHIST_CALLED(ubchist);
335 
336 	bshift = fs->e2fs_bshift;
337 	bsize = 1 << bshift;
338 
339 	delta = off & (bsize - 1);
340 	off -= delta;
341 	len += delta;
342 
343 	while (len > 0) {
344 		bsize = min(bsize, len);
345 		UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x",
346 			    off, len, bsize, 0);
347 
348 		error = ext2fs_balloc(ip, lblkno(fs, off), bsize, cred,
349 		    NULL, flags);
350 		if (error) {
351 			UVMHIST_LOG(ubchist, "error %d", error, 0,0,0);
352 			return error;
353 		}
354 
355 		/*
356 		 * increase file size now, VOP_BALLOC() requires that
357 		 * EOF be up-to-date before each call.
358 		 */
359 
360 		if (ip->i_e2fs_size < off + bsize) {
361 			UVMHIST_LOG(ubchist, "old 0x%x new 0x%x",
362 				    ip->i_e2fs_size, off + bsize,0,0);
363 			ip->i_e2fs_size = off + bsize;
364 		}
365 
366 		off += bsize;
367 		len -= bsize;
368 	}
369 	return 0;
370 }
371 
372 /*
373  * allocate a range of blocks in a file.
374  * after this function returns, any page entirely contained within the range
375  * will map to invalid data and thus must be overwritten before it is made
376  * accessible to others.
377  */
378 
379 int
380 ext2fs_balloc_range(vp, off, len, cred, flags)
381 	struct vnode *vp;
382 	off_t off, len;
383 	struct ucred *cred;
384 	int flags;
385 {
386 	off_t oldeof, eof, pagestart;
387 	struct genfs_node *gp = VTOG(vp);
388 	int i, delta, error, npages;
389 	int bshift = vp->v_mount->mnt_fs_bshift;
390 	int bsize = 1 << bshift;
391 	int ppb = max(bsize >> PAGE_SHIFT, 1);
392 	struct vm_page *pgs[ppb];
393 	UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist);
394 	UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
395 		    vp, off, len, vp->v_size);
396 
397 	oldeof = vp->v_size;
398 	eof = MAX(oldeof, off + len);
399 	UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0);
400 	pgs[0] = NULL;
401 
402 	/*
403 	 * cache the new range of the file.  this will create zeroed pages
404 	 * where the new block will be and keep them locked until the
405 	 * new block is allocated, so there will be no window where
406 	 * the old contents of the new block is visible to racing threads.
407 	 */
408 
409 	pagestart = trunc_page(off) & ~(bsize - 1);
410 	npages = MIN(ppb, (round_page(eof) - pagestart) >> PAGE_SHIFT);
411 	memset(pgs, 0, npages);
412 	simple_lock(&vp->v_interlock);
413 	error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
414 	    VM_PROT_READ, 0, PGO_SYNCIO | PGO_PASTEOF);
415 	if (error) {
416 		UVMHIST_LOG(ubchist, "getpages %d", error,0,0,0);
417 		return error;
418 	}
419 	for (i = 0; i < npages; i++) {
420 		UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
421 		KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
422 		pgs[i]->flags &= ~PG_CLEAN;
423 		uvm_pageactivate(pgs[i]);
424 	}
425 
426 	/*
427 	 * adjust off to be block-aligned.
428 	 */
429 
430 	delta = off & (bsize - 1);
431 	off -= delta;
432 	len += delta;
433 
434 	/*
435 	 * now allocate the range.
436 	 */
437 
438 	lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
439 	error = GOP_ALLOC(vp, off, len, flags, cred);
440 	UVMHIST_LOG(ubchist, "alloc %d", error,0,0,0);
441 	lockmgr(&gp->g_glock, LK_RELEASE, NULL);
442 
443 	/*
444 	 * clear PG_RDONLY on any pages we are holding
445 	 * (since they now have backing store) and unbusy them.
446 	 * if we got an error, free any pages we created past the old eob.
447 	 */
448 
449 	simple_lock(&vp->v_interlock);
450 	for (i = 0; i < npages; i++) {
451 		pgs[i]->flags &= ~PG_RDONLY;
452 		if (error) {
453 			pgs[i]->flags |= PG_RELEASED;
454 		}
455 	}
456 	if (error) {
457 		uvm_lock_pageq();
458 		uvm_page_unbusy(pgs, npages);
459 		uvm_unlock_pageq();
460 	} else {
461 		uvm_page_unbusy(pgs, npages);
462 	}
463 	simple_unlock(&vp->v_interlock);
464 	return (error);
465 }
466