1 /* $NetBSD: kttcp.c,v 1.31 2013/08/29 17:49:21 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Frank van der Linden and Jason R. Thorpe for 8 * Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project by 21 * Wasabi Systems, Inc. 22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 23 * or promote products derived from this software without specific prior 24 * written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * kttcp.c -- provides kernel support for testing network testing, 41 * see kttcp(4) 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: kttcp.c,v 1.31 2013/08/29 17:49:21 rmind Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/types.h> 49 #include <sys/ioctl.h> 50 #include <sys/file.h> 51 #include <sys/filedesc.h> 52 #include <sys/conf.h> 53 #include <sys/systm.h> 54 #include <sys/protosw.h> 55 #include <sys/proc.h> 56 #include <sys/resourcevar.h> 57 #include <sys/signal.h> 58 #include <sys/socketvar.h> 59 #include <sys/socket.h> 60 #include <sys/mbuf.h> 61 #include <sys/mount.h> 62 #include <sys/syscallargs.h> 63 64 #include <dev/kttcpio.h> 65 66 static int kttcp_send(struct lwp *l, struct kttcp_io_args *); 67 static int kttcp_recv(struct lwp *l, struct kttcp_io_args *); 68 static int kttcp_sosend(struct socket *, unsigned long long, 69 unsigned long long *, struct lwp *, int); 70 static int kttcp_soreceive(struct socket *, unsigned long long, 71 unsigned long long *, struct lwp *, int *); 72 73 void kttcpattach(int); 74 75 dev_type_ioctl(kttcpioctl); 76 77 const struct cdevsw kttcp_cdevsw = { 78 nullopen, nullclose, noread, nowrite, kttcpioctl, 79 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER 80 }; 81 82 void 83 kttcpattach(int count) 84 { 85 /* Do nothing. */ 86 } 87 88 int 89 kttcpioctl(dev_t dev, u_long cmd, void *data, int flag, 90 struct lwp *l) 91 { 92 int error; 93 94 if ((flag & FWRITE) == 0) 95 return EPERM; 96 97 switch (cmd) { 98 case KTTCP_IO_SEND: 99 error = kttcp_send(l, (struct kttcp_io_args *) data); 100 break; 101 102 case KTTCP_IO_RECV: 103 error = kttcp_recv(l, (struct kttcp_io_args *) data); 104 break; 105 106 default: 107 return EINVAL; 108 } 109 110 return error; 111 } 112 113 static int 114 kttcp_send(struct lwp *l, struct kttcp_io_args *kio) 115 { 116 struct socket *so; 117 int error; 118 struct timeval t0, t1; 119 unsigned long long len, done; 120 121 if (kio->kio_totalsize >= KTTCP_MAX_XMIT) 122 return EINVAL; 123 124 if ((error = fd_getsock(kio->kio_socket, &so)) != 0) 125 return error; 126 127 len = kio->kio_totalsize; 128 microtime(&t0); 129 do { 130 error = kttcp_sosend(so, len, &done, l, 0); 131 len -= done; 132 } while (error == 0 && len > 0); 133 134 fd_putfile(kio->kio_socket); 135 136 microtime(&t1); 137 if (error != 0) 138 return error; 139 timersub(&t1, &t0, &kio->kio_elapsed); 140 141 kio->kio_bytesdone = kio->kio_totalsize - len; 142 143 return 0; 144 } 145 146 static int 147 kttcp_recv(struct lwp *l, struct kttcp_io_args *kio) 148 { 149 struct socket *so; 150 int error; 151 struct timeval t0, t1; 152 unsigned long long len, done; 153 154 done = 0; /* XXX gcc */ 155 156 if (kio->kio_totalsize > KTTCP_MAX_XMIT) 157 return EINVAL; 158 159 if ((error = fd_getsock(kio->kio_socket, &so)) != 0) 160 return error; 161 len = kio->kio_totalsize; 162 microtime(&t0); 163 do { 164 error = kttcp_soreceive(so, len, &done, l, NULL); 165 len -= done; 166 } while (error == 0 && len > 0 && done > 0); 167 168 fd_putfile(kio->kio_socket); 169 170 microtime(&t1); 171 if (error == EPIPE) 172 error = 0; 173 if (error != 0) 174 return error; 175 timersub(&t1, &t0, &kio->kio_elapsed); 176 177 kio->kio_bytesdone = kio->kio_totalsize - len; 178 179 return 0; 180 } 181 182 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 183 184 /* 185 * Slightly changed version of sosend() 186 */ 187 static int 188 kttcp_sosend(struct socket *so, unsigned long long slen, 189 unsigned long long *done, struct lwp *l, int flags) 190 { 191 struct mbuf **mp, *m, *top; 192 long space, len, mlen; 193 int error, dontroute, atomic; 194 long long resid; 195 196 atomic = sosendallatonce(so); 197 resid = slen; 198 top = NULL; 199 /* 200 * In theory resid should be unsigned. 201 * However, space must be signed, as it might be less than 0 202 * if we over-committed, and we must use a signed comparison 203 * of space and resid. On the other hand, a negative resid 204 * causes us to loop sending 0-length segments to the protocol. 205 */ 206 if (resid < 0) { 207 error = EINVAL; 208 goto out; 209 } 210 dontroute = 211 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 212 (so->so_proto->pr_flags & PR_ATOMIC); 213 l->l_ru.ru_msgsnd++; 214 #define snderr(errno) { error = errno; goto release; } 215 solock(so); 216 restart: 217 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 218 goto out; 219 do { 220 if (so->so_state & SS_CANTSENDMORE) 221 snderr(EPIPE); 222 if (so->so_error) { 223 error = so->so_error; 224 so->so_error = 0; 225 goto release; 226 } 227 if ((so->so_state & SS_ISCONNECTED) == 0) { 228 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 229 snderr(ENOTCONN); 230 } else { 231 snderr(EDESTADDRREQ); 232 } 233 } 234 space = sbspace(&so->so_snd); 235 if (flags & MSG_OOB) 236 space += 1024; 237 if ((atomic && resid > so->so_snd.sb_hiwat)) 238 snderr(EMSGSIZE); 239 if (space < resid && (atomic || space < so->so_snd.sb_lowat)) { 240 if (so->so_state & SS_NBIO) 241 snderr(EWOULDBLOCK); 242 SBLASTRECORDCHK(&so->so_rcv, 243 "kttcp_soreceive sbwait 1"); 244 SBLASTMBUFCHK(&so->so_rcv, 245 "kttcp_soreceive sbwait 1"); 246 sbunlock(&so->so_snd); 247 error = sbwait(&so->so_snd); 248 if (error) 249 goto out; 250 goto restart; 251 } 252 mp = ⊤ 253 do { 254 sounlock(so); 255 do { 256 if (top == 0) { 257 m = m_gethdr(M_WAIT, MT_DATA); 258 mlen = MHLEN; 259 m->m_pkthdr.len = 0; 260 m->m_pkthdr.rcvif = NULL; 261 } else { 262 m = m_get(M_WAIT, MT_DATA); 263 mlen = MLEN; 264 } 265 if (resid >= MINCLSIZE && space >= MCLBYTES) { 266 m_clget(m, M_WAIT); 267 if ((m->m_flags & M_EXT) == 0) 268 goto nopages; 269 mlen = MCLBYTES; 270 #ifdef MAPPED_MBUFS 271 len = lmin(MCLBYTES, resid); 272 #else 273 if (atomic && top == 0) { 274 len = lmin(MCLBYTES - max_hdr, 275 resid); 276 m->m_data += max_hdr; 277 } else 278 len = lmin(MCLBYTES, resid); 279 #endif 280 space -= len; 281 } else { 282 nopages: 283 len = lmin(lmin(mlen, resid), space); 284 space -= len; 285 /* 286 * For datagram protocols, leave room 287 * for protocol headers in first mbuf. 288 */ 289 if (atomic && top == 0 && len < mlen) 290 MH_ALIGN(m, len); 291 } 292 resid -= len; 293 m->m_len = len; 294 *mp = m; 295 top->m_pkthdr.len += len; 296 if (error) 297 goto release; 298 mp = &m->m_next; 299 if (resid <= 0) { 300 if (flags & MSG_EOR) 301 top->m_flags |= M_EOR; 302 break; 303 } 304 } while (space > 0 && atomic); 305 solock(so); 306 307 if (so->so_state & SS_CANTSENDMORE) 308 snderr(EPIPE); 309 if (dontroute) 310 so->so_options |= SO_DONTROUTE; 311 if (resid > 0) 312 so->so_state |= SS_MORETOCOME; 313 error = (*so->so_proto->pr_usrreq)(so, 314 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 315 top, NULL, NULL, l); 316 if (dontroute) 317 so->so_options &= ~SO_DONTROUTE; 318 if (resid > 0) 319 so->so_state &= ~SS_MORETOCOME; 320 top = 0; 321 mp = ⊤ 322 if (error) 323 goto release; 324 } while (resid && space > 0); 325 } while (resid); 326 327 release: 328 sbunlock(&so->so_snd); 329 out: 330 sounlock(so); 331 if (top) 332 m_freem(top); 333 *done = slen - resid; 334 #if 0 335 printf("sosend: error %d slen %llu resid %lld\n", error, slen, resid); 336 #endif 337 return (error); 338 } 339 340 static int 341 kttcp_soreceive(struct socket *so, unsigned long long slen, 342 unsigned long long *done, struct lwp *l, int *flagsp) 343 { 344 struct mbuf *m, **mp; 345 int flags, len, error, offset, moff, type; 346 long long orig_resid, resid; 347 const struct protosw *pr; 348 struct mbuf *nextrecord; 349 350 pr = so->so_proto; 351 mp = NULL; 352 type = 0; 353 resid = orig_resid = slen; 354 if (flagsp) 355 flags = *flagsp &~ MSG_EOR; 356 else 357 flags = 0; 358 if (flags & MSG_OOB) { 359 m = m_get(M_WAIT, MT_DATA); 360 solock(so); 361 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 362 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, NULL); 363 sounlock(so); 364 if (error) 365 goto bad; 366 do { 367 resid -= min(resid, m->m_len); 368 m = m_free(m); 369 } while (resid && error == 0 && m); 370 bad: 371 if (m) 372 m_freem(m); 373 return (error); 374 } 375 if (mp) 376 *mp = NULL; 377 solock(so); 378 restart: 379 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 380 return (error); 381 m = so->so_rcv.sb_mb; 382 /* 383 * If we have less data than requested, block awaiting more 384 * (subject to any timeout) if: 385 * 1. the current count is less than the low water mark, 386 * 2. MSG_WAITALL is set, and it is possible to do the entire 387 * receive operation at once if we block (resid <= hiwat), or 388 * 3. MSG_DONTWAIT is not set. 389 * If MSG_WAITALL is set but resid is larger than the receive buffer, 390 * we have to do the receive in sections, and thus risk returning 391 * a short count if a timeout or signal occurs after we start. 392 */ 393 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 394 so->so_rcv.sb_cc < resid) && 395 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 396 ((flags & MSG_WAITALL) && resid <= so->so_rcv.sb_hiwat)) && 397 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 398 #ifdef DIAGNOSTIC 399 if (m == NULL && so->so_rcv.sb_cc) 400 panic("receive 1"); 401 #endif 402 if (so->so_error) { 403 if (m) 404 goto dontblock; 405 error = so->so_error; 406 if ((flags & MSG_PEEK) == 0) 407 so->so_error = 0; 408 goto release; 409 } 410 if (so->so_state & SS_CANTRCVMORE) { 411 if (m) 412 goto dontblock; 413 else 414 goto release; 415 } 416 for (; m; m = m->m_next) 417 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 418 m = so->so_rcv.sb_mb; 419 goto dontblock; 420 } 421 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 422 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 423 error = ENOTCONN; 424 goto release; 425 } 426 if (resid == 0) 427 goto release; 428 if ((so->so_state & SS_NBIO) || 429 (flags & (MSG_DONTWAIT|MSG_NBIO))) { 430 error = EWOULDBLOCK; 431 goto release; 432 } 433 sbunlock(&so->so_rcv); 434 error = sbwait(&so->so_rcv); 435 if (error) { 436 sounlock(so); 437 return (error); 438 } 439 goto restart; 440 } 441 dontblock: 442 /* 443 * On entry here, m points to the first record of the socket buffer. 444 * While we process the initial mbufs containing address and control 445 * info, we save a copy of m->m_nextpkt into nextrecord. 446 */ 447 #ifdef notyet /* XXXX */ 448 if (uio->uio_lwp) 449 uio->uio_lwp->l_ru.ru_msgrcv++; 450 #endif 451 KASSERT(m == so->so_rcv.sb_mb); 452 SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 1"); 453 SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 1"); 454 nextrecord = m->m_nextpkt; 455 if (pr->pr_flags & PR_ADDR) { 456 #ifdef DIAGNOSTIC 457 if (m->m_type != MT_SONAME) 458 panic("receive 1a"); 459 #endif 460 orig_resid = 0; 461 if (flags & MSG_PEEK) { 462 m = m->m_next; 463 } else { 464 sbfree(&so->so_rcv, m); 465 MFREE(m, so->so_rcv.sb_mb); 466 m = so->so_rcv.sb_mb; 467 } 468 } 469 while (m && m->m_type == MT_CONTROL && error == 0) { 470 if (flags & MSG_PEEK) { 471 m = m->m_next; 472 } else { 473 sbfree(&so->so_rcv, m); 474 MFREE(m, so->so_rcv.sb_mb); 475 m = so->so_rcv.sb_mb; 476 } 477 } 478 479 /* 480 * If m is non-NULL, we have some data to read. From now on, 481 * make sure to keep sb_lastrecord consistent when working on 482 * the last packet on the chain (nextrecord == NULL) and we 483 * change m->m_nextpkt. 484 */ 485 if (m) { 486 if ((flags & MSG_PEEK) == 0) { 487 m->m_nextpkt = nextrecord; 488 /* 489 * If nextrecord == NULL (this is a single chain), 490 * then sb_lastrecord may not be valid here if m 491 * was changed earlier. 492 */ 493 if (nextrecord == NULL) { 494 KASSERT(so->so_rcv.sb_mb == m); 495 so->so_rcv.sb_lastrecord = m; 496 } 497 } 498 type = m->m_type; 499 if (type == MT_OOBDATA) 500 flags |= MSG_OOB; 501 } else { 502 if ((flags & MSG_PEEK) == 0) { 503 KASSERT(so->so_rcv.sb_mb == m); 504 so->so_rcv.sb_mb = nextrecord; 505 SB_EMPTY_FIXUP(&so->so_rcv); 506 } 507 } 508 SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 2"); 509 SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 2"); 510 511 moff = 0; 512 offset = 0; 513 while (m && resid > 0 && error == 0) { 514 if (m->m_type == MT_OOBDATA) { 515 if (type != MT_OOBDATA) 516 break; 517 } else if (type == MT_OOBDATA) 518 break; 519 #ifdef DIAGNOSTIC 520 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 521 panic("receive 3"); 522 #endif 523 so->so_state &= ~SS_RCVATMARK; 524 len = resid; 525 if (so->so_oobmark && len > so->so_oobmark - offset) 526 len = so->so_oobmark - offset; 527 if (len > m->m_len - moff) 528 len = m->m_len - moff; 529 /* 530 * If mp is set, just pass back the mbufs. 531 * Otherwise copy them out via the uio, then free. 532 * Sockbuf must be consistent here (points to current mbuf, 533 * it points to next record) when we drop priority; 534 * we must note any additions to the sockbuf when we 535 * block interrupts again. 536 */ 537 resid -= len; 538 if (len == m->m_len - moff) { 539 if (m->m_flags & M_EOR) 540 flags |= MSG_EOR; 541 if (flags & MSG_PEEK) { 542 m = m->m_next; 543 moff = 0; 544 } else { 545 nextrecord = m->m_nextpkt; 546 sbfree(&so->so_rcv, m); 547 if (mp) { 548 *mp = m; 549 mp = &m->m_next; 550 so->so_rcv.sb_mb = m = m->m_next; 551 *mp = NULL; 552 } else { 553 MFREE(m, so->so_rcv.sb_mb); 554 m = so->so_rcv.sb_mb; 555 } 556 /* 557 * If m != NULL, we also know that 558 * so->so_rcv.sb_mb != NULL. 559 */ 560 KASSERT(so->so_rcv.sb_mb == m); 561 if (m) { 562 m->m_nextpkt = nextrecord; 563 if (nextrecord == NULL) 564 so->so_rcv.sb_lastrecord = m; 565 } else { 566 so->so_rcv.sb_mb = nextrecord; 567 SB_EMPTY_FIXUP(&so->so_rcv); 568 } 569 SBLASTRECORDCHK(&so->so_rcv, 570 "kttcp_soreceive 3"); 571 SBLASTMBUFCHK(&so->so_rcv, 572 "kttcp_soreceive 3"); 573 } 574 } else { 575 if (flags & MSG_PEEK) 576 moff += len; 577 else { 578 if (mp) { 579 sounlock(so); 580 *mp = m_copym(m, 0, len, M_WAIT); 581 solock(so); 582 } 583 m->m_data += len; 584 m->m_len -= len; 585 so->so_rcv.sb_cc -= len; 586 } 587 } 588 if (so->so_oobmark) { 589 if ((flags & MSG_PEEK) == 0) { 590 so->so_oobmark -= len; 591 if (so->so_oobmark == 0) { 592 so->so_state |= SS_RCVATMARK; 593 break; 594 } 595 } else { 596 offset += len; 597 if (offset == so->so_oobmark) 598 break; 599 } 600 } 601 if (flags & MSG_EOR) 602 break; 603 /* 604 * If the MSG_WAITALL flag is set (for non-atomic socket), 605 * we must not quit until "uio->uio_resid == 0" or an error 606 * termination. If a signal/timeout occurs, return 607 * with a short count but without error. 608 * Keep sockbuf locked against other readers. 609 */ 610 while (flags & MSG_WAITALL && m == NULL && resid > 0 && 611 !sosendallatonce(so) && !nextrecord) { 612 if (so->so_error || so->so_state & SS_CANTRCVMORE) 613 break; 614 /* 615 * If we are peeking and the socket receive buffer is 616 * full, stop since we can't get more data to peek at. 617 */ 618 if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) 619 break; 620 /* 621 * If we've drained the socket buffer, tell the 622 * protocol in case it needs to do something to 623 * get it filled again. 624 */ 625 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 626 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 627 (struct mbuf *)(long)flags, NULL, NULL); 628 SBLASTRECORDCHK(&so->so_rcv, 629 "kttcp_soreceive sbwait 2"); 630 SBLASTMBUFCHK(&so->so_rcv, 631 "kttcp_soreceive sbwait 2"); 632 error = sbwait(&so->so_rcv); 633 if (error) { 634 sbunlock(&so->so_rcv); 635 sounlock(so); 636 return (0); 637 } 638 if ((m = so->so_rcv.sb_mb) != NULL) 639 nextrecord = m->m_nextpkt; 640 } 641 } 642 643 if (m && pr->pr_flags & PR_ATOMIC) { 644 flags |= MSG_TRUNC; 645 if ((flags & MSG_PEEK) == 0) 646 (void) sbdroprecord(&so->so_rcv); 647 } 648 if ((flags & MSG_PEEK) == 0) { 649 if (m == NULL) { 650 /* 651 * First part is an SB_EMPTY_FIXUP(). Second part 652 * makes sure sb_lastrecord is up-to-date if 653 * there is still data in the socket buffer. 654 */ 655 so->so_rcv.sb_mb = nextrecord; 656 if (so->so_rcv.sb_mb == NULL) { 657 so->so_rcv.sb_mbtail = NULL; 658 so->so_rcv.sb_lastrecord = NULL; 659 } else if (nextrecord->m_nextpkt == NULL) 660 so->so_rcv.sb_lastrecord = nextrecord; 661 } 662 SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 4"); 663 SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 4"); 664 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 665 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 666 (struct mbuf *)(long)flags, NULL, NULL); 667 } 668 if (orig_resid == resid && orig_resid && 669 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 670 sbunlock(&so->so_rcv); 671 goto restart; 672 } 673 674 if (flagsp) 675 *flagsp |= flags; 676 release: 677 sbunlock(&so->so_rcv); 678 sounlock(so); 679 *done = slen - resid; 680 #if 0 681 printf("soreceive: error %d slen %llu resid %lld\n", error, slen, resid); 682 #endif 683 return (error); 684 } 685