1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/kern_subr.c,v 1.31.2.2 2002/04/21 08:09:37 bde Exp $ 40 * $DragonFly: src/sys/kern/kern_subr.c,v 1.27 2007/01/29 20:44:02 tgen Exp $ 41 */ 42 43 #include "opt_ddb.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/proc.h> 49 #include <sys/malloc.h> 50 #include <sys/lock.h> 51 #include <sys/resourcevar.h> 52 #include <sys/sysctl.h> 53 #include <sys/uio.h> 54 #include <sys/vnode.h> 55 #include <sys/thread2.h> 56 #include <machine/limits.h> 57 58 #include <cpu/lwbuf.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_page.h> 62 #include <vm/vm_map.h> 63 64 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, 65 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); 66 67 /* 68 * UIO_READ: copy the kernelspace cp to the user or kernelspace UIO 69 * UIO_WRITE: copy the user or kernelspace UIO to the kernelspace cp 70 * 71 * For userspace UIO's, uio_td must be the current thread. 72 * 73 * The syscall interface is responsible for limiting the length to 74 * ssize_t for things like read() or write() which return the bytes 75 * read or written as ssize_t. These functions work with unsigned 76 * lengths. 77 */ 78 int 79 uiomove(caddr_t cp, size_t n, struct uio *uio) 80 { 81 struct iovec *iov; 82 size_t cnt; 83 int error = 0; 84 int save = 0; 85 int baseticks = ticks; 86 87 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 88 ("uiomove: mode")); 89 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 90 ("uiomove proc")); 91 92 if (curproc) { 93 save = curproc->p_flag & P_DEADLKTREAT; 94 curproc->p_flag |= P_DEADLKTREAT; 95 } 96 97 while (n > 0 && uio->uio_resid) { 98 iov = uio->uio_iov; 99 cnt = iov->iov_len; 100 if (cnt == 0) { 101 uio->uio_iov++; 102 uio->uio_iovcnt--; 103 continue; 104 } 105 if (cnt > n) 106 cnt = n; 107 108 switch (uio->uio_segflg) { 109 110 case UIO_USERSPACE: 111 if (ticks - baseticks >= hogticks) { 112 uio_yield(); 113 baseticks = ticks; 114 } 115 if (uio->uio_rw == UIO_READ) 116 error = copyout(cp, iov->iov_base, cnt); 117 else 118 error = copyin(iov->iov_base, cp, cnt); 119 if (error) 120 break; 121 break; 122 123 case UIO_SYSSPACE: 124 if (uio->uio_rw == UIO_READ) 125 bcopy((caddr_t)cp, iov->iov_base, cnt); 126 else 127 bcopy(iov->iov_base, (caddr_t)cp, cnt); 128 break; 129 case UIO_NOCOPY: 130 break; 131 } 132 iov->iov_base = (char *)iov->iov_base + cnt; 133 iov->iov_len -= cnt; 134 uio->uio_resid -= cnt; 135 uio->uio_offset += cnt; 136 cp += cnt; 137 n -= cnt; 138 } 139 if (curproc) 140 curproc->p_flag = (curproc->p_flag & ~P_DEADLKTREAT) | save; 141 return (error); 142 } 143 144 /* 145 * Like uiomove() but copies zero-fill. Only allowed for UIO_READ, 146 * for obvious reasons. 147 */ 148 int 149 uiomovez(size_t n, struct uio *uio) 150 { 151 struct iovec *iov; 152 size_t cnt; 153 int error = 0; 154 155 KASSERT(uio->uio_rw == UIO_READ, ("uiomovez: mode")); 156 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 157 ("uiomove proc")); 158 159 while (n > 0 && uio->uio_resid) { 160 iov = uio->uio_iov; 161 cnt = iov->iov_len; 162 if (cnt == 0) { 163 uio->uio_iov++; 164 uio->uio_iovcnt--; 165 continue; 166 } 167 if (cnt > n) 168 cnt = n; 169 170 switch (uio->uio_segflg) { 171 case UIO_USERSPACE: 172 error = copyout(ZeroPage, iov->iov_base, cnt); 173 if (error) 174 break; 175 break; 176 case UIO_SYSSPACE: 177 bzero(iov->iov_base, cnt); 178 break; 179 case UIO_NOCOPY: 180 break; 181 } 182 iov->iov_base = (char *)iov->iov_base + cnt; 183 iov->iov_len -= cnt; 184 uio->uio_resid -= cnt; 185 uio->uio_offset += cnt; 186 n -= cnt; 187 } 188 return (error); 189 } 190 191 /* 192 * Wrapper for uiomove() that validates the arguments against a known-good 193 * kernel buffer. 194 */ 195 int 196 uiomove_frombuf(void *buf, size_t buflen, struct uio *uio) 197 { 198 size_t offset; 199 200 offset = (size_t)uio->uio_offset; 201 if ((off_t)offset != uio->uio_offset) 202 return (EINVAL); 203 if (buflen == 0 || offset >= buflen) 204 return (0); 205 return (uiomove((char *)buf + offset, buflen - offset, uio)); 206 } 207 208 /* 209 * Give next character to user as result of read. 210 */ 211 int 212 ureadc(int c, struct uio *uio) 213 { 214 struct iovec *iov; 215 char *iov_base; 216 217 again: 218 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) 219 panic("ureadc"); 220 iov = uio->uio_iov; 221 if (iov->iov_len == 0) { 222 uio->uio_iovcnt--; 223 uio->uio_iov++; 224 goto again; 225 } 226 switch (uio->uio_segflg) { 227 228 case UIO_USERSPACE: 229 if (subyte(iov->iov_base, c) < 0) 230 return (EFAULT); 231 break; 232 233 case UIO_SYSSPACE: 234 iov_base = iov->iov_base; 235 *iov_base = c; 236 iov->iov_base = iov_base; 237 break; 238 239 case UIO_NOCOPY: 240 break; 241 } 242 iov->iov_base = (char *)iov->iov_base + 1; 243 iov->iov_len--; 244 uio->uio_resid--; 245 uio->uio_offset++; 246 return (0); 247 } 248 249 /* 250 * General routine to allocate a hash table. Make the hash table size a 251 * power of 2 greater or equal to the number of elements requested, and 252 * store the masking value in *hashmask. 253 */ 254 void * 255 hashinit(int elements, struct malloc_type *type, u_long *hashmask) 256 { 257 long hashsize; 258 LIST_HEAD(generic, generic) *hashtbl; 259 int i; 260 261 if (elements <= 0) 262 panic("hashinit: bad elements"); 263 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 264 continue; 265 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 266 for (i = 0; i < hashsize; i++) 267 LIST_INIT(&hashtbl[i]); 268 *hashmask = hashsize - 1; 269 return (hashtbl); 270 } 271 272 /* 273 * This is a newer version which allocates a hash table of structures. 274 * 275 * The returned array will be zero'd. The caller is responsible for 276 * initializing the structures. 277 */ 278 void * 279 hashinit_ext(int elements, size_t size, struct malloc_type *type, 280 u_long *hashmask) 281 { 282 long hashsize; 283 void *hashtbl; 284 285 if (elements <= 0) 286 panic("hashinit: bad elements"); 287 for (hashsize = 2; hashsize < elements; hashsize <<= 1) 288 continue; 289 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 290 *hashmask = hashsize - 1; 291 return (hashtbl); 292 } 293 294 static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, 295 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 296 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; 297 #define NPRIMES (sizeof(primes) / sizeof(primes[0])) 298 299 /* 300 * General routine to allocate a prime number sized hash table. 301 */ 302 void * 303 phashinit(int elements, struct malloc_type *type, u_long *nentries) 304 { 305 long hashsize; 306 LIST_HEAD(generic, generic) *hashtbl; 307 int i; 308 309 if (elements <= 0) 310 panic("phashinit: bad elements"); 311 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 312 i++; 313 if (i == NPRIMES) 314 break; 315 hashsize = primes[i]; 316 } 317 hashsize = primes[i - 1]; 318 hashtbl = kmalloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); 319 for (i = 0; i < hashsize; i++) 320 LIST_INIT(&hashtbl[i]); 321 *nentries = hashsize; 322 return (hashtbl); 323 } 324 325 /* 326 * This is a newer version which allocates a hash table of structures 327 * in a prime-number size. 328 * 329 * The returned array will be zero'd. The caller is responsible for 330 * initializing the structures. 331 */ 332 void * 333 phashinit_ext(int elements, size_t size, struct malloc_type *type, 334 u_long *nentries) 335 { 336 long hashsize; 337 void *hashtbl; 338 int i; 339 340 if (elements <= 0) 341 panic("phashinit: bad elements"); 342 for (i = 1, hashsize = primes[1]; hashsize <= elements;) { 343 i++; 344 if (i == NPRIMES) 345 break; 346 hashsize = primes[i]; 347 } 348 hashsize = primes[i - 1]; 349 hashtbl = kmalloc((size_t)hashsize * size, type, M_WAITOK | M_ZERO); 350 *nentries = hashsize; 351 return (hashtbl); 352 } 353 354 /* 355 * Copyin an iovec. If the iovec array fits, use the preallocated small 356 * iovec structure. If it is too big, dynamically allocate an iovec array 357 * of sufficient size. 358 * 359 * MPSAFE 360 */ 361 int 362 iovec_copyin(struct iovec *uiov, struct iovec **kiov, struct iovec *siov, 363 size_t iov_cnt, size_t *iov_len) 364 { 365 struct iovec *iovp; 366 int error, i; 367 size_t len; 368 369 if (iov_cnt > UIO_MAXIOV) 370 return EMSGSIZE; 371 if (iov_cnt > UIO_SMALLIOV) { 372 MALLOC(*kiov, struct iovec *, sizeof(struct iovec) * iov_cnt, 373 M_IOV, M_WAITOK); 374 } else { 375 *kiov = siov; 376 } 377 error = copyin(uiov, *kiov, iov_cnt * sizeof(struct iovec)); 378 if (error == 0) { 379 *iov_len = 0; 380 for (i = 0, iovp = *kiov; i < iov_cnt; i++, iovp++) { 381 /* 382 * Check for both *iov_len overflows and out of 383 * range iovp->iov_len's. We limit to the 384 * capabilities of signed integers. 385 * 386 * GCC4 - overflow check opt requires assign/test. 387 */ 388 len = *iov_len + iovp->iov_len; 389 if (len < *iov_len) 390 error = EINVAL; 391 *iov_len = len; 392 } 393 } 394 395 /* 396 * From userland disallow iovec's which exceed the sized size 397 * limit as the system calls return ssize_t. 398 * 399 * NOTE: Internal kernel interfaces can handle the unsigned 400 * limit. 401 */ 402 if (error == 0 && (ssize_t)*iov_len < 0) 403 error = EINVAL; 404 405 if (error) 406 iovec_free(kiov, siov); 407 return (error); 408 } 409 410 411 /* 412 * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu> 413 * Copyright (c) 1982, 1986, 1991, 1993 414 * The Regents of the University of California. All rights reserved. 415 * (c) UNIX System Laboratories, Inc. 416 * All or some portions of this file are derived from material licensed 417 * to the University of California by American Telephone and Telegraph 418 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 419 * the permission of UNIX System Laboratories, Inc. 420 * 421 * Redistribution and use in source and binary forms, with or without 422 * modification, are permitted provided that the following conditions 423 * are met: 424 * 1. Redistributions of source code must retain the above copyright 425 * notice, this list of conditions and the following disclaimer. 426 * 2. Redistributions in binary form must reproduce the above copyright 427 * notice, this list of conditions and the following disclaimer in the 428 * documentation and/or other materials provided with the distribution. 429 * 4. Neither the name of the University nor the names of its contributors 430 * may be used to endorse or promote products derived from this software 431 * without specific prior written permission. 432 * 433 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 434 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 435 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 436 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 437 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 438 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 439 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 440 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 441 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 442 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 443 * SUCH DAMAGE. 444 * 445 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 446 * $FreeBSD: src/sys/i386/i386/uio_machdep.c,v 1.1 2004/03/21 20:28:36 alc Exp $ 447 */ 448 449 /* 450 * Implement uiomove(9) from physical memory using lwbuf's to reduce 451 * the creation and destruction of ephemeral mappings. 452 */ 453 int 454 uiomove_fromphys(vm_page_t *ma, vm_offset_t offset, size_t n, struct uio *uio) 455 { 456 struct lwbuf *lwb; 457 struct thread *td = curthread; 458 struct iovec *iov; 459 void *cp; 460 vm_offset_t page_offset; 461 vm_page_t m; 462 size_t cnt; 463 int error = 0; 464 int save = 0; 465 466 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 467 ("uiomove_fromphys: mode")); 468 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, 469 ("uiomove_fromphys proc")); 470 471 crit_enter(); 472 save = td->td_flags & TDF_DEADLKTREAT; 473 td->td_flags |= TDF_DEADLKTREAT; 474 crit_exit(); 475 476 while (n > 0 && uio->uio_resid) { 477 iov = uio->uio_iov; 478 cnt = iov->iov_len; 479 if (cnt == 0) { 480 uio->uio_iov++; 481 uio->uio_iovcnt--; 482 continue; 483 } 484 if (cnt > n) 485 cnt = n; 486 page_offset = offset & PAGE_MASK; 487 cnt = min(cnt, PAGE_SIZE - page_offset); 488 m = ma[offset >> PAGE_SHIFT]; 489 lwb = lwbuf_alloc(m); 490 cp = (char *)lwbuf_kva(lwb) + page_offset; 491 switch (uio->uio_segflg) { 492 case UIO_USERSPACE: 493 /* 494 * note: removed uioyield (it was the wrong place to 495 * put it). 496 */ 497 if (uio->uio_rw == UIO_READ) 498 error = copyout(cp, iov->iov_base, cnt); 499 else 500 error = copyin(iov->iov_base, cp, cnt); 501 if (error) { 502 lwbuf_free(lwb); 503 goto out; 504 } 505 break; 506 case UIO_SYSSPACE: 507 if (uio->uio_rw == UIO_READ) 508 bcopy(cp, iov->iov_base, cnt); 509 else 510 bcopy(iov->iov_base, cp, cnt); 511 break; 512 case UIO_NOCOPY: 513 break; 514 } 515 lwbuf_free(lwb); 516 iov->iov_base = (char *)iov->iov_base + cnt; 517 iov->iov_len -= cnt; 518 uio->uio_resid -= cnt; 519 uio->uio_offset += cnt; 520 offset += cnt; 521 n -= cnt; 522 } 523 out: 524 if (save == 0) { 525 crit_enter(); 526 td->td_flags &= ~TDF_DEADLKTREAT; 527 crit_exit(); 528 } 529 return (error); 530 } 531 532