1 /* $NetBSD: ulfs_bmap.c,v 1.5 2013/07/28 01:10:49 dholland Exp $ */ 2 /* from NetBSD: ufs_bmap.c,v 1.50 2013/01/22 09:39:18 dholland Exp */ 3 4 /* 5 * Copyright (c) 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ufs_bmap.c 8.8 (Berkeley) 8/11/95 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: ulfs_bmap.c,v 1.5 2013/07/28 01:10:49 dholland Exp $"); 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/stat.h> 46 #include <sys/buf.h> 47 #include <sys/proc.h> 48 #include <sys/vnode.h> 49 #include <sys/mount.h> 50 #include <sys/resourcevar.h> 51 #include <sys/trace.h> 52 #include <sys/fstrans.h> 53 54 #include <miscfs/specfs/specdev.h> 55 56 #include <ufs/lfs/ulfs_inode.h> 57 #include <ufs/lfs/ulfsmount.h> 58 #include <ufs/lfs/ulfs_extern.h> 59 #include <ufs/lfs/ulfs_bswap.h> 60 61 static bool 62 ulfs_issequential(const struct lfs *fs, daddr_t daddr0, daddr_t daddr1) 63 { 64 65 /* for ulfs, blocks in a hole is not 'contiguous'. */ 66 if (daddr0 == 0) 67 return false; 68 69 return (daddr0 + fs->um_seqinc == daddr1); 70 } 71 72 /* 73 * Bmap converts the logical block number of a file to its physical block 74 * number on the disk. The conversion is done by using the logical block 75 * number to index into the array of block pointers described by the dinode. 76 */ 77 int 78 ulfs_bmap(void *v) 79 { 80 struct vop_bmap_args /* { 81 struct vnode *a_vp; 82 daddr_t a_bn; 83 struct vnode **a_vpp; 84 daddr_t *a_bnp; 85 int *a_runp; 86 } */ *ap = v; 87 int error; 88 89 /* 90 * Check for underlying vnode requests and ensure that logical 91 * to physical mapping is requested. 92 */ 93 if (ap->a_vpp != NULL) 94 *ap->a_vpp = VTOI(ap->a_vp)->i_devvp; 95 if (ap->a_bnp == NULL) 96 return (0); 97 98 fstrans_start(ap->a_vp->v_mount, FSTRANS_SHARED); 99 error = ulfs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL, 100 ap->a_runp, ulfs_issequential); 101 fstrans_done(ap->a_vp->v_mount); 102 return error; 103 } 104 105 /* 106 * Indirect blocks are now on the vnode for the file. They are given negative 107 * logical block numbers. Indirect blocks are addressed by the negative 108 * address of the first data block to which they point. Double indirect blocks 109 * are addressed by one less than the address of the first indirect block to 110 * which they point. Triple indirect blocks are addressed by one less than 111 * the address of the first double indirect block to which they point. 112 * 113 * ulfs_bmaparray does the bmap conversion, and if requested returns the 114 * array of logical blocks which must be traversed to get to a block. 115 * Each entry contains the offset into that block that gets you to the 116 * next block and the disk address of the block (if it is assigned). 117 */ 118 119 int 120 ulfs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, 121 int *nump, int *runp, ulfs_issequential_callback_t is_sequential) 122 { 123 struct inode *ip; 124 struct buf *bp, *cbp; 125 struct ulfsmount *ump; 126 struct lfs *fs; 127 struct mount *mp; 128 struct indir a[ULFS_NIADDR + 1], *xap; 129 daddr_t daddr; 130 daddr_t metalbn; 131 int error, maxrun = 0, num; 132 133 ip = VTOI(vp); 134 mp = vp->v_mount; 135 ump = ip->i_ump; 136 fs = ip->i_lfs; 137 #ifdef DIAGNOSTIC 138 if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) 139 panic("ulfs_bmaparray: invalid arguments"); 140 #endif 141 142 if (runp) { 143 /* 144 * XXX 145 * If MAXBSIZE is the largest transfer the disks can handle, 146 * we probably want maxrun to be 1 block less so that we 147 * don't create a block larger than the device can handle. 148 */ 149 *runp = 0; 150 maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1; 151 } 152 153 if (bn >= 0 && bn < ULFS_NDADDR) { 154 if (nump != NULL) 155 *nump = 0; 156 if (ump->um_fstype == ULFS1) 157 daddr = ulfs_rw32(ip->i_ffs1_db[bn], 158 ULFS_MPNEEDSWAP(fs)); 159 else 160 daddr = ulfs_rw64(ip->i_ffs2_db[bn], 161 ULFS_MPNEEDSWAP(fs)); 162 *bnp = blkptrtodb(fs, daddr); 163 /* 164 * Since this is FFS independent code, we are out of 165 * scope for the definitions of BLK_NOCOPY and 166 * BLK_SNAP, but we do know that they will fall in 167 * the range 1..um_seqinc, so we use that test and 168 * return a request for a zeroed out buffer if attempts 169 * are made to read a BLK_NOCOPY or BLK_SNAP block. 170 */ 171 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT 172 && daddr > 0 && 173 daddr < fs->um_seqinc) { 174 *bnp = -1; 175 } else if (*bnp == 0) { 176 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) 177 == SF_SNAPSHOT) { 178 *bnp = blkptrtodb(fs, bn * fs->um_seqinc); 179 } else { 180 *bnp = -1; 181 } 182 } else if (runp) { 183 if (ump->um_fstype == ULFS1) { 184 for (++bn; bn < ULFS_NDADDR && *runp < maxrun && 185 is_sequential(fs, 186 ulfs_rw32(ip->i_ffs1_db[bn - 1], 187 ULFS_MPNEEDSWAP(fs)), 188 ulfs_rw32(ip->i_ffs1_db[bn], 189 ULFS_MPNEEDSWAP(fs))); 190 ++bn, ++*runp); 191 } else { 192 for (++bn; bn < ULFS_NDADDR && *runp < maxrun && 193 is_sequential(fs, 194 ulfs_rw64(ip->i_ffs2_db[bn - 1], 195 ULFS_MPNEEDSWAP(fs)), 196 ulfs_rw64(ip->i_ffs2_db[bn], 197 ULFS_MPNEEDSWAP(fs))); 198 ++bn, ++*runp); 199 } 200 } 201 return (0); 202 } 203 204 xap = ap == NULL ? a : ap; 205 if (!nump) 206 nump = # 207 if ((error = ulfs_getlbns(vp, bn, xap, nump)) != 0) 208 return (error); 209 210 num = *nump; 211 212 /* Get disk address out of indirect block array */ 213 if (ump->um_fstype == ULFS1) 214 daddr = ulfs_rw32(ip->i_ffs1_ib[xap->in_off], 215 ULFS_MPNEEDSWAP(fs)); 216 else 217 daddr = ulfs_rw64(ip->i_ffs2_ib[xap->in_off], 218 ULFS_MPNEEDSWAP(fs)); 219 220 for (bp = NULL, ++xap; --num; ++xap) { 221 /* 222 * Exit the loop if there is no disk address assigned yet and 223 * the indirect block isn't in the cache, or if we were 224 * looking for an indirect block and we've found it. 225 */ 226 227 metalbn = xap->in_lbn; 228 if (metalbn == bn) 229 break; 230 if (daddr == 0) { 231 mutex_enter(&bufcache_lock); 232 cbp = incore(vp, metalbn); 233 mutex_exit(&bufcache_lock); 234 if (cbp == NULL) 235 break; 236 } 237 238 /* 239 * If we get here, we've either got the block in the cache 240 * or we have a disk address for it, go fetch it. 241 */ 242 if (bp) 243 brelse(bp, 0); 244 245 xap->in_exists = 1; 246 bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); 247 if (bp == NULL) { 248 249 /* 250 * getblk() above returns NULL only iff we are 251 * pagedaemon. See the implementation of getblk 252 * for detail. 253 */ 254 255 return (ENOMEM); 256 } 257 if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { 258 trace(TR_BREADHIT, pack(vp, size), metalbn); 259 } 260 #ifdef DIAGNOSTIC 261 else if (!daddr) 262 panic("ulfs_bmaparray: indirect block not in cache"); 263 #endif 264 else { 265 trace(TR_BREADMISS, pack(vp, size), metalbn); 266 bp->b_blkno = blkptrtodb(fs, daddr); 267 bp->b_flags |= B_READ; 268 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 269 VOP_STRATEGY(vp, bp); 270 curlwp->l_ru.ru_inblock++; /* XXX */ 271 if ((error = biowait(bp)) != 0) { 272 brelse(bp, 0); 273 return (error); 274 } 275 } 276 if (ump->um_fstype == ULFS1) { 277 daddr = ulfs_rw32(((u_int32_t *)bp->b_data)[xap->in_off], 278 ULFS_MPNEEDSWAP(fs)); 279 if (num == 1 && daddr && runp) { 280 for (bn = xap->in_off + 1; 281 bn < MNINDIR(fs) && *runp < maxrun && 282 is_sequential(fs, 283 ulfs_rw32(((int32_t *)bp->b_data)[bn-1], 284 ULFS_MPNEEDSWAP(fs)), 285 ulfs_rw32(((int32_t *)bp->b_data)[bn], 286 ULFS_MPNEEDSWAP(fs))); 287 ++bn, ++*runp); 288 } 289 } else { 290 daddr = ulfs_rw64(((u_int64_t *)bp->b_data)[xap->in_off], 291 ULFS_MPNEEDSWAP(fs)); 292 if (num == 1 && daddr && runp) { 293 for (bn = xap->in_off + 1; 294 bn < MNINDIR(fs) && *runp < maxrun && 295 is_sequential(fs, 296 ulfs_rw64(((int64_t *)bp->b_data)[bn-1], 297 ULFS_MPNEEDSWAP(fs)), 298 ulfs_rw64(((int64_t *)bp->b_data)[bn], 299 ULFS_MPNEEDSWAP(fs))); 300 ++bn, ++*runp); 301 } 302 } 303 } 304 if (bp) 305 brelse(bp, 0); 306 307 /* 308 * Since this is FFS independent code, we are out of scope for the 309 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they 310 * will fall in the range 1..um_seqinc, so we use that test and 311 * return a request for a zeroed out buffer if attempts are made 312 * to read a BLK_NOCOPY or BLK_SNAP block. 313 */ 314 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT 315 && daddr > 0 && daddr < fs->um_seqinc) { 316 *bnp = -1; 317 return (0); 318 } 319 *bnp = blkptrtodb(fs, daddr); 320 if (*bnp == 0) { 321 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) 322 == SF_SNAPSHOT) { 323 *bnp = blkptrtodb(fs, bn * fs->um_seqinc); 324 } else { 325 *bnp = -1; 326 } 327 } 328 return (0); 329 } 330 331 /* 332 * Create an array of logical block number/offset pairs which represent the 333 * path of indirect blocks required to access a data block. The first "pair" 334 * contains the logical block number of the appropriate single, double or 335 * triple indirect block and the offset into the inode indirect block array. 336 * Note, the logical block number of the inode single/double/triple indirect 337 * block appears twice in the array, once with the offset into the i_ffs1_ib and 338 * once with the offset into the page itself. 339 */ 340 int 341 ulfs_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump) 342 { 343 daddr_t metalbn, realbn; 344 struct ulfsmount *ump; 345 struct lfs *fs; 346 int64_t blockcnt; 347 int lbc; 348 int i, numlevels, off; 349 350 ump = VFSTOULFS(vp->v_mount); 351 fs = ump->um_lfs; 352 if (nump) 353 *nump = 0; 354 numlevels = 0; 355 realbn = bn; 356 if (bn < 0) 357 bn = -bn; 358 KASSERT(bn >= ULFS_NDADDR); 359 360 /* 361 * Determine the number of levels of indirection. After this loop 362 * is done, blockcnt indicates the number of data blocks possible 363 * at the given level of indirection, and ULFS_NIADDR - i is the number 364 * of levels of indirection needed to locate the requested block. 365 */ 366 367 bn -= ULFS_NDADDR; 368 for (lbc = 0, i = ULFS_NIADDR;; i--, bn -= blockcnt) { 369 if (i == 0) 370 return (EFBIG); 371 372 lbc += fs->um_lognindir; 373 blockcnt = (int64_t)1 << lbc; 374 375 if (bn < blockcnt) 376 break; 377 } 378 379 /* Calculate the address of the first meta-block. */ 380 metalbn = -((realbn >= 0 ? realbn : -realbn) - bn + ULFS_NIADDR - i); 381 382 /* 383 * At each iteration, off is the offset into the bap array which is 384 * an array of disk addresses at the current level of indirection. 385 * The logical block number and the offset in that block are stored 386 * into the argument array. 387 */ 388 ap->in_lbn = metalbn; 389 ap->in_off = off = ULFS_NIADDR - i; 390 ap->in_exists = 0; 391 ap++; 392 for (++numlevels; i <= ULFS_NIADDR; i++) { 393 /* If searching for a meta-data block, quit when found. */ 394 if (metalbn == realbn) 395 break; 396 397 lbc -= fs->um_lognindir; 398 off = (bn >> lbc) & (MNINDIR(fs) - 1); 399 400 ++numlevels; 401 ap->in_lbn = metalbn; 402 ap->in_off = off; 403 ap->in_exists = 0; 404 ++ap; 405 406 metalbn -= -1 + ((int64_t)off << lbc); 407 } 408 if (nump) 409 *nump = numlevels; 410 return (0); 411 } 412