1 /* $NetBSD: kern_physio.c,v 1.33 1998/08/04 04:03:13 perry Exp $ */ 2 3 /*- 4 * Copyright (c) 1994 Christopher G. Demetriou 5 * Copyright (c) 1982, 1986, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)kern_physio.c 8.1 (Berkeley) 6/10/93 42 */ 43 44 #include "opt_uvm.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/buf.h> 49 #include <sys/conf.h> 50 #include <sys/malloc.h> 51 #include <sys/proc.h> 52 53 #include <vm/vm.h> 54 55 #if defined(UVM) 56 #include <uvm/uvm_extern.h> 57 #endif 58 59 /* 60 * The routines implemented in this file are described in: 61 * Leffler, et al.: The Design and Implementation of the 4.3BSD 62 * UNIX Operating System (Addison Welley, 1989) 63 * on pages 231-233. 64 * 65 * The routines "getphysbuf" and "putphysbuf" steal and return a swap 66 * buffer. Leffler, et al., says that swap buffers are used to do the 67 * I/O, so raw I/O requests don't have to be single-threaded. 68 */ 69 70 struct buf *getphysbuf __P((void)); 71 void putphysbuf __P((struct buf *bp)); 72 73 /* 74 * Do "physical I/O" on behalf of a user. "Physical I/O" is I/O directly 75 * from the raw device to user buffers, and bypasses the buffer cache. 76 * 77 * Comments in brackets are from Leffler, et al.'s pseudo-code implementation. 78 */ 79 int 80 physio(strategy, bp, dev, flags, minphys, uio) 81 void (*strategy) __P((struct buf *)); 82 struct buf *bp; 83 dev_t dev; 84 int flags; 85 void (*minphys) __P((struct buf *)); 86 struct uio *uio; 87 { 88 struct iovec *iovp; 89 struct proc *p = curproc; 90 int error, done, i, nobuf, s, todo; 91 92 error = 0; 93 flags &= B_READ | B_WRITE; 94 95 /* 96 * [check user read/write access to the data buffer] 97 * 98 * Check each iov one by one. Note that we know if we're reading or 99 * writing, so we ignore the uio's rw parameter. Also note that if 100 * we're doing a read, that's a *write* to user-space. 101 */ 102 if (uio->uio_segflg == UIO_USERSPACE) 103 for (i = 0; i < uio->uio_iovcnt; i++) 104 #if defined(UVM) /* XXXCDC: map not locked, rethink */ 105 if (!uvm_useracc(uio->uio_iov[i].iov_base, 106 uio->uio_iov[i].iov_len, 107 (flags == B_READ) ? B_WRITE : B_READ)) 108 return (EFAULT); 109 #else 110 if (!useracc(uio->uio_iov[i].iov_base, 111 uio->uio_iov[i].iov_len, 112 (flags == B_READ) ? B_WRITE : B_READ)) 113 return (EFAULT); 114 #endif 115 116 /* Make sure we have a buffer, creating one if necessary. */ 117 if ((nobuf = (bp == NULL)) != 0) { 118 119 bp = getphysbuf(); 120 /* bp was just malloc'd so can't already be busy */ 121 bp->b_flags |= B_BUSY; 122 123 } else { 124 125 /* [raise the processor priority level to splbio;] */ 126 s = splbio(); 127 128 /* [while the buffer is marked busy] */ 129 while (bp->b_flags & B_BUSY) { 130 /* [mark the buffer wanted] */ 131 bp->b_flags |= B_WANTED; 132 /* [wait until the buffer is available] */ 133 tsleep((caddr_t)bp, PRIBIO+1, "physbuf", 0); 134 } 135 136 /* Mark it busy, so nobody else will use it. */ 137 bp->b_flags |= B_BUSY; 138 139 /* [lower the priority level] */ 140 splx(s); 141 142 } 143 144 /* [set up the fixed part of the buffer for a transfer] */ 145 bp->b_dev = dev; 146 bp->b_error = 0; 147 bp->b_proc = p; 148 149 /* 150 * [while there are data to transfer and no I/O error] 151 * Note that I/O errors are handled with a 'goto' at the bottom 152 * of the 'while' loop. 153 */ 154 for (i = 0; i < uio->uio_iovcnt; i++) { 155 iovp = &uio->uio_iov[i]; 156 while (iovp->iov_len > 0) { 157 /* 158 * [mark the buffer busy for physical I/O] 159 * (i.e. set B_PHYS (because it's an I/O to user 160 * memory, and B_RAW, because B_RAW is to be 161 * "Set by physio for raw transfers.", in addition 162 * to the "busy" and read/write flag.) 163 */ 164 bp->b_flags = B_BUSY | B_PHYS | B_RAW | flags; 165 166 /* [set up the buffer for a maximum-sized transfer] */ 167 bp->b_blkno = btodb(uio->uio_offset); 168 bp->b_bcount = iovp->iov_len; 169 bp->b_data = iovp->iov_base; 170 171 /* 172 * [call minphys to bound the tranfer size] 173 * and remember the amount of data to transfer, 174 * for later comparison. 175 */ 176 (*minphys)(bp); 177 todo = bp->b_bcount; 178 #ifdef DIAGNOSTIC 179 if (todo < 0) 180 panic("todo < 0; minphys broken"); 181 if (todo > MAXPHYS) 182 panic("todo > MAXPHYS; minphys broken"); 183 #endif 184 185 /* 186 * [lock the part of the user address space involved 187 * in the transfer] 188 * Beware vmapbuf(); it clobbers b_data and 189 * saves it in b_saveaddr. However, vunmapbuf() 190 * restores it. 191 */ 192 PHOLD(p); 193 #if defined(UVM) 194 uvm_vslock(p, bp->b_data, todo); 195 #else 196 vslock(p, bp->b_data, todo); 197 #endif 198 vmapbuf(bp, todo); 199 200 /* [call strategy to start the transfer] */ 201 (*strategy)(bp); 202 203 /* 204 * Note that the raise/wait/lower/get error 205 * steps below would be done by biowait(), but 206 * we want to unlock the address space before 207 * we lower the priority. 208 * 209 * [raise the priority level to splbio] 210 */ 211 s = splbio(); 212 213 /* [wait for the transfer to complete] */ 214 while ((bp->b_flags & B_DONE) == 0) 215 tsleep((caddr_t) bp, PRIBIO + 1, "physio", 0); 216 217 /* Mark it busy again, so nobody else will use it. */ 218 bp->b_flags |= B_BUSY; 219 220 /* [lower the priority level] */ 221 splx(s); 222 223 /* 224 * [unlock the part of the address space previously 225 * locked] 226 */ 227 vunmapbuf(bp, todo); 228 #if defined(UVM) 229 uvm_vsunlock(p, bp->b_data, todo); 230 #else 231 vsunlock(p, bp->b_data, todo); 232 #endif 233 PRELE(p); 234 235 /* remember error value (save a splbio/splx pair) */ 236 if (bp->b_flags & B_ERROR) 237 error = (bp->b_error ? bp->b_error : EIO); 238 239 /* 240 * [deduct the transfer size from the total number 241 * of data to transfer] 242 */ 243 done = bp->b_bcount - bp->b_resid; 244 #ifdef DIAGNOSTIC 245 if (done < 0) 246 panic("done < 0; strategy broken"); 247 if (done > todo) 248 panic("done > todo; strategy broken"); 249 #endif 250 iovp->iov_len -= done; 251 (caddr_t)iovp->iov_base += done; 252 uio->uio_offset += done; 253 uio->uio_resid -= done; 254 255 /* 256 * Now, check for an error. 257 * Also, handle weird end-of-disk semantics. 258 */ 259 if (error || done < todo) 260 goto done; 261 } 262 } 263 264 done: 265 /* 266 * [clean up the state of the buffer] 267 * Remember if somebody wants it, so we can wake them up below. 268 * Also, if we had to steal it, give it back. 269 */ 270 s = splbio(); 271 bp->b_flags &= ~(B_BUSY | B_PHYS | B_RAW); 272 if (nobuf) 273 putphysbuf(bp); 274 else { 275 /* 276 * [if another process is waiting for the raw I/O buffer, 277 * wake up processes waiting to do physical I/O; 278 */ 279 if (bp->b_flags & B_WANTED) { 280 bp->b_flags &= ~B_WANTED; 281 wakeup(bp); 282 } 283 } 284 splx(s); 285 286 return (error); 287 } 288 289 /* 290 * allocate a buffer structure for use in physical I/O. 291 */ 292 struct buf * 293 getphysbuf() 294 { 295 struct buf *bp; 296 297 bp = malloc(sizeof(*bp), M_TEMP, M_WAITOK); 298 memset(bp, 0, sizeof(*bp)); 299 300 /* XXXCDC: are the following two lines necessary? */ 301 bp->b_rcred = bp->b_wcred = NOCRED; 302 bp->b_vnbufs.le_next = NOLIST; 303 304 return(bp); 305 } 306 307 /* 308 * get rid of a swap buffer structure which has been used in physical I/O. 309 */ 310 void 311 putphysbuf(bp) 312 struct buf *bp; 313 { 314 315 /* XXXCDC: is this necesary? */ 316 if (bp->b_vp) 317 brelvp(bp); 318 319 if (bp->b_flags & B_WANTED) 320 panic("putphysbuf: private buf B_WANTED"); 321 free(bp, M_TEMP); 322 323 } 324 325 /* 326 * Leffler, et al., says on p. 231: 327 * "The minphys() routine is called by physio() to adjust the 328 * size of each I/O transfer before the latter is passed to 329 * the strategy routine..." 330 * 331 * so, just adjust the buffer's count accounting to MAXPHYS here, 332 * and return the new count; 333 */ 334 void 335 minphys(bp) 336 struct buf *bp; 337 { 338 339 if (bp->b_bcount > MAXPHYS) 340 bp->b_bcount = MAXPHYS; 341 } 342