1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/kern_subr.c,v 1.31.2.2 2002/04/21 08:09:37 bde Exp $ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/proc.h> 46 #include <sys/malloc.h> 47 #include <sys/lock.h> 48 #include <sys/resourcevar.h> 49 #include <sys/sysctl.h> 50 #include <sys/uio.h> 51 #include <sys/vnode.h> 52 #include <sys/thread2.h> 53 #include <machine/limits.h> 54 55 #include <cpu/lwbuf.h> 56 57 #include <vm/vm.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_map.h> 60 61 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 62 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 63 64 /* 65 * UIO_READ: copy the kernelspace cp to the user or kernelspace UIO 66 * UIO_WRITE: copy the user or kernelspace UIO to the kernelspace cp 67 * 68 * For userspace UIO's, uio_td must be the current thread. 69 * 70 * The syscall interface is responsible for limiting the length to 71 * ssize_t for things like read() or write() which return the bytes 72 * read or written as ssize_t. These functions work with unsigned 73 * lengths. 74 */ 75 int 76 uiomove(caddr_t cp, size_t n, struct uio *uio) 77 { 78 thread_t td = curthread; 79 struct iovec *iov; 80 size_t cnt; 81 size_t tot; 82 int error = 0; 83 int save = 0; 84 85 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 86 ("uiomove: mode")); 87 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, 88 ("uiomove proc")); 89 90 crit_enter(); 91 save = td->td_flags & TDF_DEADLKTREAT; 92 td->td_flags |= TDF_DEADLKTREAT; 93 crit_exit(); 94 95 tot = 0; 96 97 while (n > 0 && uio->uio_resid) { 98 iov = uio->uio_iov; 99 cnt = iov->iov_len; 100 if (cnt == 0) { 101 uio->uio_iov++; 102 uio->uio_iovcnt--; 103 continue; 104 } 105 if (cnt > n) 106 cnt = n; 107 tot += cnt; 108 109 switch (uio->uio_segflg) { 110 case UIO_USERSPACE: 111 if (tot > 1024*1024) 112 lwkt_user_yield(); 113 if (uio->uio_rw == UIO_READ) 114 error = copyout(cp, iov->iov_base, cnt); 115 else 116 error = copyin(iov->iov_base, cp, cnt); 117 break; 118 case UIO_SYSSPACE: 119 if (uio->uio_rw == UIO_READ) 120 bcopy(cp, iov->iov_base, cnt); 121 else 122 bcopy(iov->iov_base, cp, cnt); 123 break; 124 case UIO_NOCOPY: 125 break; 126 } 127 128 if (error) 129 break; 130 iov->iov_base = (char *)iov->iov_base + cnt; 131 iov->iov_len -= cnt; 132 uio->uio_resid -= cnt; 133 uio->uio_offset += cnt; 134 cp += cnt; 135 n -= cnt; 136 } 137 crit_enter(); 138 td->td_flags = (td->td_flags & ~TDF_DEADLKTREAT) | save; 139 crit_exit(); 140 141 return (error); 142 } 143 144 /* 145 * This is the same as uiomove() except (cp, n) is within the bounds of 146 * the passed, locked buffer. Under certain circumstances a VM fault 147 * occuring with a locked buffer held can result in a deadlock or an 148 * attempt to recursively lock the buffer. 149 * 150 * This procedure deals with these cases. 151 * 152 * If the buffer represents a regular file, is B_CACHE, but the last VM page 153 * is not fully valid we fix-up the last VM page. This should handle the 154 * recursive lock issue. 155 * 156 * Deadlocks are another issue. We are holding the vp and the bp locked 157 * and could deadlock against a different vp and/or bp if another thread is 158 * trying to access us while we accessing it. The only solution here is 159 * to release the bp and vnode lock and do the uio to/from a system buffer, 160 * then regain the locks and copyback (if applicable). XXX TODO. 161 */ 162 int 163 uiomovebp(struct buf *bp, caddr_t cp, size_t n, struct uio *uio) 164 { 165 int count; 166 vm_page_t m; 167 168 if (bp->b_vp && bp->b_vp->v_type == VREG && 169 (bp->b_flags & B_CACHE) && 170 (count = bp->b_xio.xio_npages) != 0 && 171 (m = bp->b_xio.xio_pages[count-1])->valid != VM_PAGE_BITS_ALL) { 172 vm_page_zero_invalid(m, TRUE); 173 } 174 return (uiomove(cp, n, uio)); 175 } 176 177 /* 178 * uiomove() but fail for non-trivial VM faults, even if the VM fault is 179 * valid. Returns EFAULT if a VM fault occurred via the copyin/copyout 180 * onfault code. 181 * 182 * This allows callers to hold e.g. a busy VM page, or a busy VM object, 183 * or a locked vnode through the call and then fall-back to safer code 184 * if we fail. 185 */ 186 int 187 uiomove_nofault(caddr_t cp, size_t n, struct uio *uio) 188 { 189 thread_t td = curthread; 190 int error; 191 192 atomic_set_int(&td->td_flags, TDF_NOFAULT); 193 error = uiomove(cp, n, uio); 194 atomic_clear_int(&td->td_flags, TDF_NOFAULT); 195 return error; 196 } 197 198 /* 199 * Like uiomove() but copies zero-fill. Only allowed for UIO_READ, 200 * for obvious reasons. 201 */ 202 int 203 uiomovez(size_t n, struct uio *uio) 204 { 205 struct iovec *iov; 206 size_t cnt; 207 int error = 0; 208 209 KASSERT(uio->uio_rw == UIO_READ, ("uiomovez: mode")); 210 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 211 ("uiomove proc")); 212 213 while (n > 0 && uio->uio_resid) { 214 iov = uio->uio_iov; 215 cnt = iov->iov_len; 216 if (cnt == 0) { 217 uio->uio_iov++; 218 uio->uio_iovcnt--; 219 continue; 220 } 221 if (cnt > n) 222 cnt = n; 223 224 switch (uio->uio_segflg) { 225 case UIO_USERSPACE: 226 error = copyout(ZeroPage, iov->iov_base, cnt); 227 break; 228 case UIO_SYSSPACE: 229 bzero(iov->iov_base, cnt); 230 break; 231 case UIO_NOCOPY: 232 break; 233 } 234 235 if (error) 236 break; 237 iov->iov_base = (char *)iov->iov_base + cnt; 238 iov->iov_len -= cnt; 239 uio->uio_resid -= cnt; 240 uio->uio_offset += cnt; 241 n -= cnt; 242 } 243 return (error); 244 } 245 246 /* 247 * Wrapper for uiomove() that validates the arguments against a known-good 248 * kernel buffer. This function automatically indexes the buffer by 249 * uio_offset and handles all range checking. 250 */ 251 int 252 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 253 { 254 size_t offset; 255 256 offset = (size_t)uio->uio_offset; 257 if ((off_t)offset != uio->uio_offset) 258 return (EINVAL); 259 if (buflen == 0 || offset >= buflen) 260 return (0); 261 return (uiomove((char *)buf + offset, buflen - offset, uio)); 262 } 263 264 /* 265 * Give next character to user as result of read. 266 */ 267 int 268 ureadc(int c, struct uio *uio) 269 { 270 struct iovec *iov; 271 char *iov_base; 272 273 again: 274 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 275 panic("ureadc"); 276 iov = uio->uio_iov; 277 if (iov->iov_len == 0) { 278 uio->uio_iovcnt--; 279 uio->uio_iov++; 280 goto again; 281 } 282 283 switch (uio->uio_segflg) { 284 case UIO_USERSPACE: 285 if (subyte(iov->iov_base, c) < 0) 286 return (EFAULT); 287 break; 288 case UIO_SYSSPACE: 289 iov_base = iov->iov_base; 290 *iov_base = c; 291 iov->iov_base = iov_base; 292 break; 293 case UIO_NOCOPY: 294 break; 295 } 296 297 iov->iov_base = (char *)iov->iov_base + 1; 298 iov->iov_len--; 299 uio->uio_resid--; 300 uio->uio_offset++; 301 return (0); 302 } 303 304 /* 305 * General routine to allocate a hash table. Make the hash table size a 306 * power of 2 greater or equal to the number of elements requested, and 307 * store the masking value in *hashmask. 308 */ 309 void * 310 hashinit(int elements, struct malloc_type *type, u_long *hashmask) 311 { 312 long hashsize; 313 LIST_HEAD(generic, generic) *hashtbl; 314 int i; 315 316 if (elements <= 0) 317 panic("hashinit: bad elements"); 318 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 319 continue; 320 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 321 for (i = 0; i < hashsize; i++) 322 LIST_INIT(&hashtbl[i]); 323 *hashmask = hashsize - 1; 324 return (hashtbl); 325 } 326 327 void 328 hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) 329 { 330 LIST_HEAD(generic, generic) *hashtbl, *hp; 331 332 hashtbl = vhashtbl; 333 for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) 334 KASSERT(LIST_EMPTY(hp), ("%s: hash not empty", __func__)); 335 kfree(hashtbl, type); 336 } 337 338 /* 339 * This is a newer version which allocates a hash table of structures. 340 * 341 * The returned array will be zero'd. The caller is responsible for 342 * initializing the structures. 343 */ 344 void * 345 hashinit_ext(int elements, size_t size, struct malloc_type *type, 346 u_long *hashmask) 347 { 348 long hashsize; 349 void *hashtbl; 350 351 if (elements <= 0) 352 panic("hashinit: bad elements"); 353 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 354 continue; 355 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 356 *hashmask = hashsize - 1; 357 return (hashtbl); 358 } 359 360 static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 361 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 362 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 363 #define NPRIMES NELEM(primes) 364 365 /* 366 * General routine to allocate a prime number sized hash table. 367 */ 368 void * 369 phashinit(int elements, struct malloc_type *type, u_long *nentries) 370 { 371 long hashsize; 372 LIST_HEAD(generic, generic) *hashtbl; 373 int i; 374 375 if (elements <= 0) 376 panic("phashinit: bad elements"); 377 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 378 i++; 379 if (i == NPRIMES) 380 break; 381 hashsize = primes[i]; 382 } 383 hashsize = primes[i - 1]; 384 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 385 for (i = 0; i < hashsize; i++) 386 LIST_INIT(&hashtbl[i]); 387 *nentries = hashsize; 388 return (hashtbl); 389 } 390 391 /* 392 * This is a newer version which allocates a hash table of structures 393 * in a prime-number size. 394 * 395 * The returned array will be zero'd. The caller is responsible for 396 * initializing the structures. 397 */ 398 void * 399 phashinit_ext(int elements, size_t size, struct malloc_type *type, 400 u_long *nentries) 401 { 402 long hashsize; 403 void *hashtbl; 404 int i; 405 406 if (elements <= 0) 407 panic("phashinit: bad elements"); 408 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 409 i++; 410 if (i == NPRIMES) 411 break; 412 hashsize = primes[i]; 413 } 414 hashsize = primes[i - 1]; 415 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 416 *nentries = hashsize; 417 return (hashtbl); 418 } 419 420 /* 421 * Copyin an iovec. If the iovec array fits, use the preallocated small 422 * iovec structure. If it is too big, dynamically allocate an iovec array 423 * of sufficient size. 424 * 425 * MPSAFE 426 */ 427 int 428 iovec_copyin(struct iovec *uiov, struct iovec **kiov, struct iovec *siov, 429 size_t iov_cnt, size_t *iov_len) 430 { 431 struct iovec *iovp; 432 int error, i; 433 size_t len; 434 435 if (iov_cnt > UIO_MAXIOV) 436 return EMSGSIZE; 437 if (iov_cnt > UIO_SMALLIOV) { 438 *kiov = kmalloc(sizeof(struct iovec) * iov_cnt, M_IOV, 439 M_WAITOK); 440 } else { 441 *kiov = siov; 442 } 443 error = copyin(uiov, *kiov, iov_cnt * sizeof(struct iovec)); 444 if (error == 0) { 445 *iov_len = 0; 446 for (i = 0, iovp = *kiov; i < iov_cnt; i++, iovp++) { 447 /* 448 * Check for both *iov_len overflows and out of 449 * range iovp->iov_len's. We limit to the 450 * capabilities of signed integers. 451 * 452 * GCC4 - overflow check opt requires assign/test. 453 */ 454 len = *iov_len + iovp->iov_len; 455 if (len < *iov_len) 456 error = EINVAL; 457 *iov_len = len; 458 } 459 } 460 461 /* 462 * From userland disallow iovec's which exceed the sized size 463 * limit as the system calls return ssize_t. 464 * 465 * NOTE: Internal kernel interfaces can handle the unsigned 466 * limit. 467 */ 468 if (error == 0 && (ssize_t)*iov_len < 0) 469 error = EINVAL; 470 471 if (error) 472 iovec_free(kiov, siov); 473 return (error); 474 } 475 476 477 /* 478 * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu> 479 * Copyright (c) 1982, 1986, 1991, 1993 480 * The Regents of the University of California. All rights reserved. 481 * (c) UNIX System Laboratories, Inc. 482 * All or some portions of this file are derived from material licensed 483 * to the University of California by American Telephone and Telegraph 484 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 485 * the permission of UNIX System Laboratories, Inc. 486 * 487 * Redistribution and use in source and binary forms, with or without 488 * modification, are permitted provided that the following conditions 489 * are met: 490 * 1. Redistributions of source code must retain the above copyright 491 * notice, this list of conditions and the following disclaimer. 492 * 2. Redistributions in binary form must reproduce the above copyright 493 * notice, this list of conditions and the following disclaimer in the 494 * documentation and/or other materials provided with the distribution. 495 * 4. Neither the name of the University nor the names of its contributors 496 * may be used to endorse or promote products derived from this software 497 * without specific prior written permission. 498 * 499 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 500 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 501 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 502 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 503 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 504 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 505 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 506 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 507 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 508 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 509 * SUCH DAMAGE. 510 * 511 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 512 * $FreeBSD: src/sys/i386/i386/uio_machdep.c,v 1.1 2004/03/21 20:28:36 alc Exp $ 513 */ 514 515 /* 516 * Implement uiomove(9) from physical memory using lwbuf's to reduce 517 * the creation and destruction of ephemeral mappings. 518 */ 519 int 520 uiomove_fromphys(vm_page_t *ma, vm_offset_t offset, size_t n, struct uio *uio) 521 { 522 struct lwbuf lwb_cache; 523 struct lwbuf *lwb; 524 struct thread *td = curthread; 525 struct iovec *iov; 526 void *cp; 527 vm_offset_t page_offset; 528 vm_page_t m; 529 size_t cnt; 530 int error = 0; 531 int save = 0; 532 533 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 534 ("uiomove_fromphys: mode")); 535 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 536 ("uiomove_fromphys proc")); 537 538 crit_enter(); 539 save = td->td_flags & TDF_DEADLKTREAT; 540 td->td_flags |= TDF_DEADLKTREAT; 541 crit_exit(); 542 543 while (n > 0 && uio->uio_resid) { 544 iov = uio->uio_iov; 545 cnt = iov->iov_len; 546 if (cnt == 0) { 547 uio->uio_iov++; 548 uio->uio_iovcnt--; 549 continue; 550 } 551 if (cnt > n) 552 cnt = n; 553 page_offset = offset & PAGE_MASK; 554 cnt = min(cnt, PAGE_SIZE - page_offset); 555 m = ma[offset >> PAGE_SHIFT]; 556 lwb = lwbuf_alloc(m, &lwb_cache); 557 cp = (char *)lwbuf_kva(lwb) + page_offset; 558 559 switch (uio->uio_segflg) { 560 case UIO_USERSPACE: 561 /* 562 * note: removed uioyield (it was the wrong place to 563 * put it). 564 */ 565 if (uio->uio_rw == UIO_READ) 566 error = copyout(cp, iov->iov_base, cnt); 567 else 568 error = copyin(iov->iov_base, cp, cnt); 569 if (error) { 570 lwbuf_free(lwb); 571 goto out; 572 } 573 break; 574 case UIO_SYSSPACE: 575 if (uio->uio_rw == UIO_READ) 576 bcopy(cp, iov->iov_base, cnt); 577 else 578 bcopy(iov->iov_base, cp, cnt); 579 break; 580 case UIO_NOCOPY: 581 break; 582 } 583 lwbuf_free(lwb); 584 iov->iov_base = (char *)iov->iov_base + cnt; 585 iov->iov_len -= cnt; 586 uio->uio_resid -= cnt; 587 uio->uio_offset += cnt; 588 offset += cnt; 589 n -= cnt; 590 } 591 out: 592 if (save == 0) { 593 crit_enter(); 594 td->td_flags &= ~TDF_DEADLKTREAT; 595 crit_exit(); 596 } 597 return (error); 598 } 599 600