1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/kern_subr.c,v 1.31.2.2 2002/04/21 08:09:37 bde Exp $ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/proc.h> 46 #include <sys/malloc.h> 47 #include <sys/lock.h> 48 #include <sys/resourcevar.h> 49 #include <sys/sysctl.h> 50 #include <sys/uio.h> 51 #include <sys/vnode.h> 52 #include <sys/thread2.h> 53 #include <machine/limits.h> 54 55 #include <cpu/lwbuf.h> 56 57 #include <vm/vm.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_map.h> 60 61 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 62 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 63 64 /* 65 * UIO_READ: copy the kernelspace cp to the user or kernelspace UIO 66 * UIO_WRITE: copy the user or kernelspace UIO to the kernelspace cp 67 * 68 * For userspace UIO's, uio_td must be the current thread. 69 * 70 * The syscall interface is responsible for limiting the length to 71 * ssize_t for things like read() or write() which return the bytes 72 * read or written as ssize_t. These functions work with unsigned 73 * lengths. 74 */ 75 int 76 uiomove(caddr_t cp, size_t n, struct uio *uio) 77 { 78 thread_t td = curthread; 79 struct iovec *iov; 80 size_t cnt; 81 int error = 0; 82 int save = 0; 83 84 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 85 ("uiomove: mode")); 86 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, 87 ("uiomove proc")); 88 89 crit_enter(); 90 save = td->td_flags & TDF_DEADLKTREAT; 91 td->td_flags |= TDF_DEADLKTREAT; 92 crit_exit(); 93 94 while (n > 0 && uio->uio_resid) { 95 iov = uio->uio_iov; 96 cnt = iov->iov_len; 97 if (cnt == 0) { 98 uio->uio_iov++; 99 uio->uio_iovcnt--; 100 continue; 101 } 102 if (cnt > n) 103 cnt = n; 104 105 switch (uio->uio_segflg) { 106 107 case UIO_USERSPACE: 108 lwkt_user_yield(); 109 if (uio->uio_rw == UIO_READ) 110 error = copyout(cp, iov->iov_base, cnt); 111 else 112 error = copyin(iov->iov_base, cp, cnt); 113 if (error) 114 break; 115 break; 116 117 case UIO_SYSSPACE: 118 if (uio->uio_rw == UIO_READ) 119 bcopy(cp, iov->iov_base, cnt); 120 else 121 bcopy(iov->iov_base, cp, cnt); 122 break; 123 case UIO_NOCOPY: 124 break; 125 } 126 iov->iov_base = (char *)iov->iov_base + cnt; 127 iov->iov_len -= cnt; 128 uio->uio_resid -= cnt; 129 uio->uio_offset += cnt; 130 cp += cnt; 131 n -= cnt; 132 } 133 crit_enter(); 134 td->td_flags = (td->td_flags & ~TDF_DEADLKTREAT) | save; 135 crit_exit(); 136 return (error); 137 } 138 139 /* 140 * This is the same as uiomove() except (cp, n) is within the bounds of 141 * the passed, locked buffer. Under certain circumstances a VM fault 142 * occuring with a locked buffer held can result in a deadlock or an 143 * attempt to recursively lock the buffer. 144 * 145 * This procedure deals with these cases. 146 * 147 * If the buffer represents a regular file, is B_CACHE, but the last VM page 148 * is not fully valid we fix-up the last VM page. This should handle the 149 * recursive lock issue. 150 * 151 * Deadlocks are another issue. We are holding the vp and the bp locked 152 * and could deadlock against a different vp and/or bp if another thread is 153 * trying to access us while we accessing it. The only solution here is 154 * to release the bp and vnode lock and do the uio to/from a system buffer, 155 * then regain the locks and copyback (if applicable). XXX TODO. 156 */ 157 int 158 uiomovebp(struct buf *bp, caddr_t cp, size_t n, struct uio *uio) 159 { 160 int count; 161 vm_page_t m; 162 163 if (bp->b_vp && bp->b_vp->v_type == VREG && 164 (bp->b_flags & B_CACHE) && 165 (count = bp->b_xio.xio_npages) != 0 && 166 (m = bp->b_xio.xio_pages[count-1])->valid != VM_PAGE_BITS_ALL) { 167 vm_page_zero_invalid(m, TRUE); 168 } 169 return (uiomove(cp, n, uio)); 170 } 171 172 /* 173 * Like uiomove() but copies zero-fill. Only allowed for UIO_READ, 174 * for obvious reasons. 175 */ 176 int 177 uiomovez(size_t n, struct uio *uio) 178 { 179 struct iovec *iov; 180 size_t cnt; 181 int error = 0; 182 183 KASSERT(uio->uio_rw == UIO_READ, ("uiomovez: mode")); 184 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 185 ("uiomove proc")); 186 187 while (n > 0 && uio->uio_resid) { 188 iov = uio->uio_iov; 189 cnt = iov->iov_len; 190 if (cnt == 0) { 191 uio->uio_iov++; 192 uio->uio_iovcnt--; 193 continue; 194 } 195 if (cnt > n) 196 cnt = n; 197 198 switch (uio->uio_segflg) { 199 case UIO_USERSPACE: 200 error = copyout(ZeroPage, iov->iov_base, cnt); 201 if (error) 202 break; 203 break; 204 case UIO_SYSSPACE: 205 bzero(iov->iov_base, cnt); 206 break; 207 case UIO_NOCOPY: 208 break; 209 } 210 iov->iov_base = (char *)iov->iov_base + cnt; 211 iov->iov_len -= cnt; 212 uio->uio_resid -= cnt; 213 uio->uio_offset += cnt; 214 n -= cnt; 215 } 216 return (error); 217 } 218 219 /* 220 * Wrapper for uiomove() that validates the arguments against a known-good 221 * kernel buffer. This function automatically indexes the buffer by 222 * uio_offset and handles all range checking. 223 */ 224 int 225 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 226 { 227 size_t offset; 228 229 offset = (size_t)uio->uio_offset; 230 if ((off_t)offset != uio->uio_offset) 231 return (EINVAL); 232 if (buflen == 0 || offset >= buflen) 233 return (0); 234 return (uiomove((char *)buf + offset, buflen - offset, uio)); 235 } 236 237 /* 238 * Give next character to user as result of read. 239 */ 240 int 241 ureadc(int c, struct uio *uio) 242 { 243 struct iovec *iov; 244 char *iov_base; 245 246 again: 247 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 248 panic("ureadc"); 249 iov = uio->uio_iov; 250 if (iov->iov_len == 0) { 251 uio->uio_iovcnt--; 252 uio->uio_iov++; 253 goto again; 254 } 255 switch (uio->uio_segflg) { 256 257 case UIO_USERSPACE: 258 if (subyte(iov->iov_base, c) < 0) 259 return (EFAULT); 260 break; 261 262 case UIO_SYSSPACE: 263 iov_base = iov->iov_base; 264 *iov_base = c; 265 iov->iov_base = iov_base; 266 break; 267 268 case UIO_NOCOPY: 269 break; 270 } 271 iov->iov_base = (char *)iov->iov_base + 1; 272 iov->iov_len--; 273 uio->uio_resid--; 274 uio->uio_offset++; 275 return (0); 276 } 277 278 /* 279 * General routine to allocate a hash table. Make the hash table size a 280 * power of 2 greater or equal to the number of elements requested, and 281 * store the masking value in *hashmask. 282 */ 283 void * 284 hashinit(int elements, struct malloc_type *type, u_long *hashmask) 285 { 286 long hashsize; 287 LIST_HEAD(generic, generic) *hashtbl; 288 int i; 289 290 if (elements <= 0) 291 panic("hashinit: bad elements"); 292 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 293 continue; 294 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 295 for (i = 0; i < hashsize; i++) 296 LIST_INIT(&hashtbl[i]); 297 *hashmask = hashsize - 1; 298 return (hashtbl); 299 } 300 301 void 302 hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) 303 { 304 LIST_HEAD(generic, generic) *hashtbl, *hp; 305 306 hashtbl = vhashtbl; 307 for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) 308 KASSERT(LIST_EMPTY(hp), ("%s: hash not empty", __func__)); 309 kfree(hashtbl, type); 310 } 311 312 /* 313 * This is a newer version which allocates a hash table of structures. 314 * 315 * The returned array will be zero'd. The caller is responsible for 316 * initializing the structures. 317 */ 318 void * 319 hashinit_ext(int elements, size_t size, struct malloc_type *type, 320 u_long *hashmask) 321 { 322 long hashsize; 323 void *hashtbl; 324 325 if (elements <= 0) 326 panic("hashinit: bad elements"); 327 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 328 continue; 329 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 330 *hashmask = hashsize - 1; 331 return (hashtbl); 332 } 333 334 static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 335 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 336 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 337 #define NPRIMES NELEM(primes) 338 339 /* 340 * General routine to allocate a prime number sized hash table. 341 */ 342 void * 343 phashinit(int elements, struct malloc_type *type, u_long *nentries) 344 { 345 long hashsize; 346 LIST_HEAD(generic, generic) *hashtbl; 347 int i; 348 349 if (elements <= 0) 350 panic("phashinit: bad elements"); 351 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 352 i++; 353 if (i == NPRIMES) 354 break; 355 hashsize = primes[i]; 356 } 357 hashsize = primes[i - 1]; 358 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 359 for (i = 0; i < hashsize; i++) 360 LIST_INIT(&hashtbl[i]); 361 *nentries = hashsize; 362 return (hashtbl); 363 } 364 365 /* 366 * This is a newer version which allocates a hash table of structures 367 * in a prime-number size. 368 * 369 * The returned array will be zero'd. The caller is responsible for 370 * initializing the structures. 371 */ 372 void * 373 phashinit_ext(int elements, size_t size, struct malloc_type *type, 374 u_long *nentries) 375 { 376 long hashsize; 377 void *hashtbl; 378 int i; 379 380 if (elements <= 0) 381 panic("phashinit: bad elements"); 382 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 383 i++; 384 if (i == NPRIMES) 385 break; 386 hashsize = primes[i]; 387 } 388 hashsize = primes[i - 1]; 389 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 390 *nentries = hashsize; 391 return (hashtbl); 392 } 393 394 /* 395 * Copyin an iovec. If the iovec array fits, use the preallocated small 396 * iovec structure. If it is too big, dynamically allocate an iovec array 397 * of sufficient size. 398 * 399 * MPSAFE 400 */ 401 int 402 iovec_copyin(struct iovec *uiov, struct iovec **kiov, struct iovec *siov, 403 size_t iov_cnt, size_t *iov_len) 404 { 405 struct iovec *iovp; 406 int error, i; 407 size_t len; 408 409 if (iov_cnt > UIO_MAXIOV) 410 return EMSGSIZE; 411 if (iov_cnt > UIO_SMALLIOV) { 412 *kiov = kmalloc(sizeof(struct iovec) * iov_cnt, M_IOV, 413 M_WAITOK); 414 } else { 415 *kiov = siov; 416 } 417 error = copyin(uiov, *kiov, iov_cnt * sizeof(struct iovec)); 418 if (error == 0) { 419 *iov_len = 0; 420 for (i = 0, iovp = *kiov; i < iov_cnt; i++, iovp++) { 421 /* 422 * Check for both *iov_len overflows and out of 423 * range iovp->iov_len's. We limit to the 424 * capabilities of signed integers. 425 * 426 * GCC4 - overflow check opt requires assign/test. 427 */ 428 len = *iov_len + iovp->iov_len; 429 if (len < *iov_len) 430 error = EINVAL; 431 *iov_len = len; 432 } 433 } 434 435 /* 436 * From userland disallow iovec's which exceed the sized size 437 * limit as the system calls return ssize_t. 438 * 439 * NOTE: Internal kernel interfaces can handle the unsigned 440 * limit. 441 */ 442 if (error == 0 && (ssize_t)*iov_len < 0) 443 error = EINVAL; 444 445 if (error) 446 iovec_free(kiov, siov); 447 return (error); 448 } 449 450 451 /* 452 * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu> 453 * Copyright (c) 1982, 1986, 1991, 1993 454 * The Regents of the University of California. All rights reserved. 455 * (c) UNIX System Laboratories, Inc. 456 * All or some portions of this file are derived from material licensed 457 * to the University of California by American Telephone and Telegraph 458 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 459 * the permission of UNIX System Laboratories, Inc. 460 * 461 * Redistribution and use in source and binary forms, with or without 462 * modification, are permitted provided that the following conditions 463 * are met: 464 * 1. Redistributions of source code must retain the above copyright 465 * notice, this list of conditions and the following disclaimer. 466 * 2. Redistributions in binary form must reproduce the above copyright 467 * notice, this list of conditions and the following disclaimer in the 468 * documentation and/or other materials provided with the distribution. 469 * 4. Neither the name of the University nor the names of its contributors 470 * may be used to endorse or promote products derived from this software 471 * without specific prior written permission. 472 * 473 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 474 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 475 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 476 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 477 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 478 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 479 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 480 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 481 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 482 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 483 * SUCH DAMAGE. 484 * 485 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 486 * $FreeBSD: src/sys/i386/i386/uio_machdep.c,v 1.1 2004/03/21 20:28:36 alc Exp $ 487 */ 488 489 /* 490 * Implement uiomove(9) from physical memory using lwbuf's to reduce 491 * the creation and destruction of ephemeral mappings. 492 */ 493 int 494 uiomove_fromphys(vm_page_t *ma, vm_offset_t offset, size_t n, struct uio *uio) 495 { 496 struct lwbuf lwb_cache; 497 struct lwbuf *lwb; 498 struct thread *td = curthread; 499 struct iovec *iov; 500 void *cp; 501 vm_offset_t page_offset; 502 vm_page_t m; 503 size_t cnt; 504 int error = 0; 505 int save = 0; 506 507 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 508 ("uiomove_fromphys: mode")); 509 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 510 ("uiomove_fromphys proc")); 511 512 crit_enter(); 513 save = td->td_flags & TDF_DEADLKTREAT; 514 td->td_flags |= TDF_DEADLKTREAT; 515 crit_exit(); 516 517 while (n > 0 && uio->uio_resid) { 518 iov = uio->uio_iov; 519 cnt = iov->iov_len; 520 if (cnt == 0) { 521 uio->uio_iov++; 522 uio->uio_iovcnt--; 523 continue; 524 } 525 if (cnt > n) 526 cnt = n; 527 page_offset = offset & PAGE_MASK; 528 cnt = min(cnt, PAGE_SIZE - page_offset); 529 m = ma[offset >> PAGE_SHIFT]; 530 lwb = lwbuf_alloc(m, &lwb_cache); 531 cp = (char *)lwbuf_kva(lwb) + page_offset; 532 switch (uio->uio_segflg) { 533 case UIO_USERSPACE: 534 /* 535 * note: removed uioyield (it was the wrong place to 536 * put it). 537 */ 538 if (uio->uio_rw == UIO_READ) 539 error = copyout(cp, iov->iov_base, cnt); 540 else 541 error = copyin(iov->iov_base, cp, cnt); 542 if (error) { 543 lwbuf_free(lwb); 544 goto out; 545 } 546 break; 547 case UIO_SYSSPACE: 548 if (uio->uio_rw == UIO_READ) 549 bcopy(cp, iov->iov_base, cnt); 550 else 551 bcopy(iov->iov_base, cp, cnt); 552 break; 553 case UIO_NOCOPY: 554 break; 555 } 556 lwbuf_free(lwb); 557 iov->iov_base = (char *)iov->iov_base + cnt; 558 iov->iov_len -= cnt; 559 uio->uio_resid -= cnt; 560 uio->uio_offset += cnt; 561 offset += cnt; 562 n -= cnt; 563 } 564 out: 565 if (save == 0) { 566 crit_enter(); 567 td->td_flags &= ~TDF_DEADLKTREAT; 568 crit_exit(); 569 } 570 return (error); 571 } 572 573