1 /* $NetBSD: kern_physio.c,v 1.20 1994/06/29 06:32:34 cgd Exp $ */ 2 3 /*- 4 * Copyright (c) 1994 Christopher G. Demetriou 5 * Copyright (c) 1982, 1986, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)kern_physio.c 8.1 (Berkeley) 6/10/93 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/buf.h> 47 #include <sys/conf.h> 48 #include <sys/proc.h> 49 50 /* 51 * The routines implemented in this file are described in: 52 * Leffler, et al.: The Design and Implementation of the 4.3BSD 53 * UNIX Operating System (Addison Welley, 1989) 54 * on pages 231-233. 55 * 56 * The routines "getphysbuf" and "putphysbuf" steal and return a swap 57 * buffer. Leffler, et al., says that swap buffers are used to do the 58 * I/O, so raw I/O requests don't have to be single-threaded. 59 */ 60 61 struct buf *getphysbuf __P((void)); 62 void putphysbuf __P((struct buf *bp)); 63 64 /* 65 * Do "physical I/O" on behalf of a user. "Physical I/O" is I/O directly 66 * from the raw device to user buffers, and bypasses the buffer cache. 67 * 68 * Comments in brackets are from Leffler, et al.'s pseudo-code implementation. 69 */ 70 int 71 physio(strategy, bp, dev, flags, minphys, uio) 72 int (*strategy)(); 73 struct buf *bp; 74 dev_t dev; 75 int flags; 76 u_int (*minphys)(); 77 struct uio *uio; 78 { 79 struct iovec *iovp; 80 struct proc *p = curproc; 81 int error, done, i, nobuf, s, todo; 82 83 error = 0; 84 flags &= B_READ | B_WRITE; 85 86 /* 87 * [check user read/write access to the data buffer] 88 * 89 * Check each iov one by one. Note that we know if we're reading or 90 * writing, so we ignore the uio's rw parameter. Also note that if 91 * we're doing a read, that's a *write* to user-space. 92 */ 93 for (i = 0; i < uio->uio_iovcnt; i++) 94 if (!useracc(uio->uio_iov[i].iov_base, uio->uio_iov[i].iov_len, 95 (flags == B_READ) ? B_WRITE : B_READ)) 96 return (EFAULT); 97 98 /* Make sure we have a buffer, creating one if necessary. */ 99 if (nobuf = (bp == NULL)) 100 bp = getphysbuf(); 101 102 /* [raise the processor priority level to splbio;] */ 103 s = splbio(); 104 105 /* [while the buffer is marked busy] */ 106 while (bp->b_flags & B_BUSY) { 107 /* [mark the buffer wanted] */ 108 bp->b_flags |= B_WANTED; 109 /* [wait until the buffer is available] */ 110 tsleep((caddr_t)bp, PRIBIO+1, "physbuf", 0); 111 } 112 113 /* Mark it busy, so nobody else will use it. */ 114 bp->b_flags |= B_BUSY; 115 116 /* [lower the priority level] */ 117 splx(s); 118 119 /* [set up the fixed part of the buffer for a transfer] */ 120 bp->b_dev = dev; 121 bp->b_error = 0; 122 bp->b_proc = p; 123 124 /* 125 * [while there are data to transfer and no I/O error] 126 * Note that I/O errors are handled with a 'goto' at the bottom 127 * of the 'while' loop. 128 */ 129 for (i = 0; i < uio->uio_iovcnt; i++) { 130 iovp = &uio->uio_iov[i]; 131 while (iovp->iov_len > 0) { 132 /* 133 * [mark the buffer busy for physical I/O] 134 * (i.e. set B_PHYS (because it's an I/O to user 135 * memory, and B_RAW, because B_RAW is to be 136 * "Set by physio for raw transfers.", in addition 137 * to the "busy" and read/write flag.) 138 */ 139 s = splbio(); 140 bp->b_flags = B_BUSY | B_PHYS | B_RAW | flags; 141 splx(s); 142 143 /* [set up the buffer for a maximum-sized transfer] */ 144 bp->b_blkno = btodb(uio->uio_offset); 145 bp->b_bcount = iovp->iov_len; 146 bp->b_data = iovp->iov_base; 147 148 /* 149 * [call minphys to bound the tranfer size] 150 * and remember the amount of data to transfer, 151 * for later comparison. 152 */ 153 (*minphys)(bp); 154 todo = bp->b_bcount; 155 156 /* 157 * [lock the part of the user address space involved 158 * in the transfer] 159 * Beware vmapbuf(); it clobbers b_data and 160 * saves it in b_saveaddr. However, vunmapbuf() 161 * restores it. 162 */ 163 p->p_holdcnt++; 164 vslock(bp->b_data, todo); 165 vmapbuf(bp, todo); 166 167 /* [call strategy to start the transfer] */ 168 (*strategy)(bp); 169 170 /* 171 * Note that the raise/wait/lower/get error 172 * steps below would be done by biowait(), but 173 * we want to unlock the address space before 174 * we lower the priority. 175 * 176 * [raise the priority level to splbio] 177 */ 178 s = splbio(); 179 180 /* [wait for the transfer to complete] */ 181 while ((bp->b_flags & B_DONE) == 0) 182 tsleep((caddr_t) bp, PRIBIO + 1, "physio", 0); 183 184 /* 185 * [unlock the part of the address space previously 186 * locked] 187 */ 188 vunmapbuf(bp, todo); 189 vsunlock(bp->b_data, todo); 190 p->p_holdcnt--; 191 192 /* remember error value (save a splbio/splx pair) */ 193 if (bp->b_flags & B_ERROR) 194 error = (bp->b_error ? bp->b_error : EIO); 195 196 /* [lower the priority level] */ 197 splx(s); 198 199 /* 200 * [deduct the transfer size from the total number 201 * of data to transfer] 202 */ 203 done = bp->b_bcount - bp->b_resid; 204 iovp->iov_len -= done; 205 iovp->iov_base += done; 206 uio->uio_offset += done; 207 uio->uio_resid -= done; 208 209 /* 210 * Now, check for an error. 211 * Also, handle weird end-of-disk semantics. 212 */ 213 if (error || done < todo) 214 goto done; 215 } 216 } 217 218 done: 219 /* 220 * [clean up the state of the buffer] 221 * Remember if somebody wants it, so we can wake them up below. 222 * Also, if we had to steal it, give it back. 223 */ 224 s = splbio(); 225 bp->b_flags &= ~(B_BUSY | B_PHYS | B_RAW); 226 if (nobuf) 227 putphysbuf(bp); 228 else { 229 /* 230 * [if another process is waiting for the raw I/O buffer, 231 * wake up processes waiting to do physical I/O; 232 */ 233 if (bp->b_flags & B_WANTED) { 234 bp->b_flags &= ~B_WANTED; 235 wakeup(bp); 236 } 237 } 238 splx(s); 239 240 return (error); 241 } 242 243 /* 244 * Get a swap buffer structure, for use in physical I/O. 245 * Mostly taken from /sys/vm/swap_pager.c, except that it no longer 246 * records buffer list-empty conditions, and sleeps at PRIBIO + 1, 247 * rather than PSWP + 1 (and on a different wchan). 248 */ 249 struct buf * 250 getphysbuf() 251 { 252 struct buf *bp; 253 int s; 254 255 s = splbio(); 256 while (bswlist.b_actf == NULL) { 257 bswlist.b_flags |= B_WANTED; 258 tsleep((caddr_t)&bswlist, PRIBIO + 1, "getphys", 0); 259 } 260 bp = bswlist.b_actf; 261 bswlist.b_actf = bp->b_actf; 262 splx(s); 263 return (bp); 264 } 265 266 /* 267 * Get rid of a swap buffer structure which has been used in physical I/O. 268 * Mostly taken from /sys/vm/swap_pager.c, except that it now uses 269 * wakeup() rather than the VM-internal thread_wakeup(), and that the caller 270 * must mask disk interrupts, rather than putphysbuf() itself. 271 */ 272 void 273 putphysbuf(bp) 274 struct buf *bp; 275 { 276 277 bp->b_actf = bswlist.b_actf; 278 bswlist.b_actf = bp; 279 if (bp->b_vp) 280 brelvp(bp); 281 if (bswlist.b_flags & B_WANTED) { 282 bswlist.b_flags &= ~B_WANTED; 283 wakeup(&bswlist); 284 } 285 } 286 287 /* 288 * Leffler, et al., says on p. 231: 289 * "The minphys() routine is called by physio() to adjust the 290 * size of each I/O transfer before the latter is passed to 291 * the strategy routine..." 292 * 293 * so, just adjust the buffer's count accounting to MAXPHYS here, 294 * and return the new count; 295 */ 296 u_int 297 minphys(bp) 298 struct buf *bp; 299 { 300 301 bp->b_bcount = min(MAXPHYS, bp->b_bcount); 302 return bp->b_bcount; 303 } 304 305 /* 306 * Do a read on a device for a user process. 307 */ 308 rawread(dev, uio) 309 dev_t dev; 310 struct uio *uio; 311 { 312 return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL, 313 dev, B_READ, minphys, uio)); 314 } 315 316 /* 317 * Do a write on a device for a user process. 318 */ 319 rawwrite(dev, uio) 320 dev_t dev; 321 struct uio *uio; 322 { 323 return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL, 324 dev, B_WRITE, minphys, uio)); 325 } 326