xref: /netbsd-src/sys/ufs/lfs/ulfs_bmap.c (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1 /*	$NetBSD: ulfs_bmap.c,v 1.5 2013/07/28 01:10:49 dholland Exp $	*/
2 /*  from NetBSD: ufs_bmap.c,v 1.50 2013/01/22 09:39:18 dholland Exp  */
3 
4 /*
5  * Copyright (c) 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)ufs_bmap.c	8.8 (Berkeley) 8/11/95
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: ulfs_bmap.c,v 1.5 2013/07/28 01:10:49 dholland Exp $");
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/stat.h>
46 #include <sys/buf.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 #include <sys/mount.h>
50 #include <sys/resourcevar.h>
51 #include <sys/trace.h>
52 #include <sys/fstrans.h>
53 
54 #include <miscfs/specfs/specdev.h>
55 
56 #include <ufs/lfs/ulfs_inode.h>
57 #include <ufs/lfs/ulfsmount.h>
58 #include <ufs/lfs/ulfs_extern.h>
59 #include <ufs/lfs/ulfs_bswap.h>
60 
61 static bool
62 ulfs_issequential(const struct lfs *fs, daddr_t daddr0, daddr_t daddr1)
63 {
64 
65 	/* for ulfs, blocks in a hole is not 'contiguous'. */
66 	if (daddr0 == 0)
67 		return false;
68 
69 	return (daddr0 + fs->um_seqinc == daddr1);
70 }
71 
72 /*
73  * Bmap converts the logical block number of a file to its physical block
74  * number on the disk. The conversion is done by using the logical block
75  * number to index into the array of block pointers described by the dinode.
76  */
77 int
78 ulfs_bmap(void *v)
79 {
80 	struct vop_bmap_args /* {
81 		struct vnode *a_vp;
82 		daddr_t  a_bn;
83 		struct vnode **a_vpp;
84 		daddr_t *a_bnp;
85 		int *a_runp;
86 	} */ *ap = v;
87 	int error;
88 
89 	/*
90 	 * Check for underlying vnode requests and ensure that logical
91 	 * to physical mapping is requested.
92 	 */
93 	if (ap->a_vpp != NULL)
94 		*ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
95 	if (ap->a_bnp == NULL)
96 		return (0);
97 
98 	fstrans_start(ap->a_vp->v_mount, FSTRANS_SHARED);
99 	error = ulfs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
100 	    ap->a_runp, ulfs_issequential);
101 	fstrans_done(ap->a_vp->v_mount);
102 	return error;
103 }
104 
105 /*
106  * Indirect blocks are now on the vnode for the file.  They are given negative
107  * logical block numbers.  Indirect blocks are addressed by the negative
108  * address of the first data block to which they point.  Double indirect blocks
109  * are addressed by one less than the address of the first indirect block to
110  * which they point.  Triple indirect blocks are addressed by one less than
111  * the address of the first double indirect block to which they point.
112  *
113  * ulfs_bmaparray does the bmap conversion, and if requested returns the
114  * array of logical blocks which must be traversed to get to a block.
115  * Each entry contains the offset into that block that gets you to the
116  * next block and the disk address of the block (if it is assigned).
117  */
118 
119 int
120 ulfs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap,
121     int *nump, int *runp, ulfs_issequential_callback_t is_sequential)
122 {
123 	struct inode *ip;
124 	struct buf *bp, *cbp;
125 	struct ulfsmount *ump;
126 	struct lfs *fs;
127 	struct mount *mp;
128 	struct indir a[ULFS_NIADDR + 1], *xap;
129 	daddr_t daddr;
130 	daddr_t metalbn;
131 	int error, maxrun = 0, num;
132 
133 	ip = VTOI(vp);
134 	mp = vp->v_mount;
135 	ump = ip->i_ump;
136 	fs = ip->i_lfs;
137 #ifdef DIAGNOSTIC
138 	if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL))
139 		panic("ulfs_bmaparray: invalid arguments");
140 #endif
141 
142 	if (runp) {
143 		/*
144 		 * XXX
145 		 * If MAXBSIZE is the largest transfer the disks can handle,
146 		 * we probably want maxrun to be 1 block less so that we
147 		 * don't create a block larger than the device can handle.
148 		 */
149 		*runp = 0;
150 		maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1;
151 	}
152 
153 	if (bn >= 0 && bn < ULFS_NDADDR) {
154 		if (nump != NULL)
155 			*nump = 0;
156 		if (ump->um_fstype == ULFS1)
157 			daddr = ulfs_rw32(ip->i_ffs1_db[bn],
158 			    ULFS_MPNEEDSWAP(fs));
159 		else
160 			daddr = ulfs_rw64(ip->i_ffs2_db[bn],
161 			    ULFS_MPNEEDSWAP(fs));
162 		*bnp = blkptrtodb(fs, daddr);
163 		/*
164 		 * Since this is FFS independent code, we are out of
165 		 * scope for the definitions of BLK_NOCOPY and
166 		 * BLK_SNAP, but we do know that they will fall in
167 		 * the range 1..um_seqinc, so we use that test and
168 		 * return a request for a zeroed out buffer if attempts
169 		 * are made to read a BLK_NOCOPY or BLK_SNAP block.
170 		 */
171 		if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT
172 		    && daddr > 0 &&
173 		    daddr < fs->um_seqinc) {
174 			*bnp = -1;
175 		} else if (*bnp == 0) {
176 			if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))
177 			    == SF_SNAPSHOT) {
178 				*bnp = blkptrtodb(fs, bn * fs->um_seqinc);
179 			} else {
180 				*bnp = -1;
181 			}
182 		} else if (runp) {
183 			if (ump->um_fstype == ULFS1) {
184 				for (++bn; bn < ULFS_NDADDR && *runp < maxrun &&
185 				    is_sequential(fs,
186 				        ulfs_rw32(ip->i_ffs1_db[bn - 1],
187 				            ULFS_MPNEEDSWAP(fs)),
188 				        ulfs_rw32(ip->i_ffs1_db[bn],
189 				            ULFS_MPNEEDSWAP(fs)));
190 				    ++bn, ++*runp);
191 			} else {
192 				for (++bn; bn < ULFS_NDADDR && *runp < maxrun &&
193 				    is_sequential(fs,
194 				        ulfs_rw64(ip->i_ffs2_db[bn - 1],
195 				            ULFS_MPNEEDSWAP(fs)),
196 				        ulfs_rw64(ip->i_ffs2_db[bn],
197 				            ULFS_MPNEEDSWAP(fs)));
198 				    ++bn, ++*runp);
199 			}
200 		}
201 		return (0);
202 	}
203 
204 	xap = ap == NULL ? a : ap;
205 	if (!nump)
206 		nump = &num;
207 	if ((error = ulfs_getlbns(vp, bn, xap, nump)) != 0)
208 		return (error);
209 
210 	num = *nump;
211 
212 	/* Get disk address out of indirect block array */
213 	if (ump->um_fstype == ULFS1)
214 		daddr = ulfs_rw32(ip->i_ffs1_ib[xap->in_off],
215 		    ULFS_MPNEEDSWAP(fs));
216 	else
217 		daddr = ulfs_rw64(ip->i_ffs2_ib[xap->in_off],
218 		    ULFS_MPNEEDSWAP(fs));
219 
220 	for (bp = NULL, ++xap; --num; ++xap) {
221 		/*
222 		 * Exit the loop if there is no disk address assigned yet and
223 		 * the indirect block isn't in the cache, or if we were
224 		 * looking for an indirect block and we've found it.
225 		 */
226 
227 		metalbn = xap->in_lbn;
228 		if (metalbn == bn)
229 			break;
230 		if (daddr == 0) {
231 			mutex_enter(&bufcache_lock);
232 			cbp = incore(vp, metalbn);
233 			mutex_exit(&bufcache_lock);
234 			if (cbp == NULL)
235 				break;
236 		}
237 
238 		/*
239 		 * If we get here, we've either got the block in the cache
240 		 * or we have a disk address for it, go fetch it.
241 		 */
242 		if (bp)
243 			brelse(bp, 0);
244 
245 		xap->in_exists = 1;
246 		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
247 		if (bp == NULL) {
248 
249 			/*
250 			 * getblk() above returns NULL only iff we are
251 			 * pagedaemon.  See the implementation of getblk
252 			 * for detail.
253 			 */
254 
255 			return (ENOMEM);
256 		}
257 		if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
258 			trace(TR_BREADHIT, pack(vp, size), metalbn);
259 		}
260 #ifdef DIAGNOSTIC
261 		else if (!daddr)
262 			panic("ulfs_bmaparray: indirect block not in cache");
263 #endif
264 		else {
265 			trace(TR_BREADMISS, pack(vp, size), metalbn);
266 			bp->b_blkno = blkptrtodb(fs, daddr);
267 			bp->b_flags |= B_READ;
268 			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
269 			VOP_STRATEGY(vp, bp);
270 			curlwp->l_ru.ru_inblock++;	/* XXX */
271 			if ((error = biowait(bp)) != 0) {
272 				brelse(bp, 0);
273 				return (error);
274 			}
275 		}
276 		if (ump->um_fstype == ULFS1) {
277 			daddr = ulfs_rw32(((u_int32_t *)bp->b_data)[xap->in_off],
278 			    ULFS_MPNEEDSWAP(fs));
279 			if (num == 1 && daddr && runp) {
280 				for (bn = xap->in_off + 1;
281 				    bn < MNINDIR(fs) && *runp < maxrun &&
282 				    is_sequential(fs,
283 				        ulfs_rw32(((int32_t *)bp->b_data)[bn-1],
284 				            ULFS_MPNEEDSWAP(fs)),
285 				        ulfs_rw32(((int32_t *)bp->b_data)[bn],
286 				            ULFS_MPNEEDSWAP(fs)));
287 				    ++bn, ++*runp);
288 			}
289 		} else {
290 			daddr = ulfs_rw64(((u_int64_t *)bp->b_data)[xap->in_off],
291 			    ULFS_MPNEEDSWAP(fs));
292 			if (num == 1 && daddr && runp) {
293 				for (bn = xap->in_off + 1;
294 				    bn < MNINDIR(fs) && *runp < maxrun &&
295 				    is_sequential(fs,
296 				        ulfs_rw64(((int64_t *)bp->b_data)[bn-1],
297 				            ULFS_MPNEEDSWAP(fs)),
298 				        ulfs_rw64(((int64_t *)bp->b_data)[bn],
299 				            ULFS_MPNEEDSWAP(fs)));
300 				    ++bn, ++*runp);
301 			}
302 		}
303 	}
304 	if (bp)
305 		brelse(bp, 0);
306 
307 	/*
308 	 * Since this is FFS independent code, we are out of scope for the
309 	 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
310 	 * will fall in the range 1..um_seqinc, so we use that test and
311 	 * return a request for a zeroed out buffer if attempts are made
312 	 * to read a BLK_NOCOPY or BLK_SNAP block.
313 	 */
314 	if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT
315 	    && daddr > 0 && daddr < fs->um_seqinc) {
316 		*bnp = -1;
317 		return (0);
318 	}
319 	*bnp = blkptrtodb(fs, daddr);
320 	if (*bnp == 0) {
321 		if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))
322 		    == SF_SNAPSHOT) {
323 			*bnp = blkptrtodb(fs, bn * fs->um_seqinc);
324 		} else {
325 			*bnp = -1;
326 		}
327 	}
328 	return (0);
329 }
330 
331 /*
332  * Create an array of logical block number/offset pairs which represent the
333  * path of indirect blocks required to access a data block.  The first "pair"
334  * contains the logical block number of the appropriate single, double or
335  * triple indirect block and the offset into the inode indirect block array.
336  * Note, the logical block number of the inode single/double/triple indirect
337  * block appears twice in the array, once with the offset into the i_ffs1_ib and
338  * once with the offset into the page itself.
339  */
340 int
341 ulfs_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump)
342 {
343 	daddr_t metalbn, realbn;
344 	struct ulfsmount *ump;
345 	struct lfs *fs;
346 	int64_t blockcnt;
347 	int lbc;
348 	int i, numlevels, off;
349 
350 	ump = VFSTOULFS(vp->v_mount);
351 	fs = ump->um_lfs;
352 	if (nump)
353 		*nump = 0;
354 	numlevels = 0;
355 	realbn = bn;
356 	if (bn < 0)
357 		bn = -bn;
358 	KASSERT(bn >= ULFS_NDADDR);
359 
360 	/*
361 	 * Determine the number of levels of indirection.  After this loop
362 	 * is done, blockcnt indicates the number of data blocks possible
363 	 * at the given level of indirection, and ULFS_NIADDR - i is the number
364 	 * of levels of indirection needed to locate the requested block.
365 	 */
366 
367 	bn -= ULFS_NDADDR;
368 	for (lbc = 0, i = ULFS_NIADDR;; i--, bn -= blockcnt) {
369 		if (i == 0)
370 			return (EFBIG);
371 
372 		lbc += fs->um_lognindir;
373 		blockcnt = (int64_t)1 << lbc;
374 
375 		if (bn < blockcnt)
376 			break;
377 	}
378 
379 	/* Calculate the address of the first meta-block. */
380 	metalbn = -((realbn >= 0 ? realbn : -realbn) - bn + ULFS_NIADDR - i);
381 
382 	/*
383 	 * At each iteration, off is the offset into the bap array which is
384 	 * an array of disk addresses at the current level of indirection.
385 	 * The logical block number and the offset in that block are stored
386 	 * into the argument array.
387 	 */
388 	ap->in_lbn = metalbn;
389 	ap->in_off = off = ULFS_NIADDR - i;
390 	ap->in_exists = 0;
391 	ap++;
392 	for (++numlevels; i <= ULFS_NIADDR; i++) {
393 		/* If searching for a meta-data block, quit when found. */
394 		if (metalbn == realbn)
395 			break;
396 
397 		lbc -= fs->um_lognindir;
398 		off = (bn >> lbc) & (MNINDIR(fs) - 1);
399 
400 		++numlevels;
401 		ap->in_lbn = metalbn;
402 		ap->in_off = off;
403 		ap->in_exists = 0;
404 		++ap;
405 
406 		metalbn -= -1 + ((int64_t)off << lbc);
407 	}
408 	if (nump)
409 		*nump = numlevels;
410 	return (0);
411 }
412