1 /* $NetBSD: kttcp.c,v 1.43 2024/07/05 04:31:50 rin Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Frank van der Linden and Jason R. Thorpe for 8 * Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project by 21 * Wasabi Systems, Inc. 22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 23 * or promote products derived from this software without specific prior 24 * written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * kttcp.c -- provides kernel support for testing network testing, 41 * see kttcp(4) 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: kttcp.c,v 1.43 2024/07/05 04:31:50 rin Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/types.h> 49 #include <sys/ioctl.h> 50 #include <sys/file.h> 51 #include <sys/filedesc.h> 52 #include <sys/conf.h> 53 #include <sys/systm.h> 54 #include <sys/protosw.h> 55 #include <sys/proc.h> 56 #include <sys/resourcevar.h> 57 #include <sys/signal.h> 58 #include <sys/socketvar.h> 59 #include <sys/socket.h> 60 #include <sys/mbuf.h> 61 #include <sys/mount.h> 62 #include <sys/syscallargs.h> 63 64 #include <dev/kttcpio.h> 65 66 #include "ioconf.h" 67 68 static int kttcp_send(struct lwp *l, struct kttcp_io_args *); 69 static int kttcp_recv(struct lwp *l, struct kttcp_io_args *); 70 static int kttcp_sosend(struct socket *, unsigned long long, 71 unsigned long long *, struct lwp *, int); 72 static int kttcp_soreceive(struct socket *, unsigned long long, 73 unsigned long long *, struct lwp *, int *); 74 75 dev_type_ioctl(kttcpioctl); 76 77 const struct cdevsw kttcp_cdevsw = { 78 .d_open = nullopen, 79 .d_close = nullclose, 80 .d_read = noread, 81 .d_write = nowrite, 82 .d_ioctl = kttcpioctl, 83 .d_stop = nostop, 84 .d_tty = notty, 85 .d_poll = nopoll, 86 .d_mmap = nommap, 87 .d_kqfilter = nokqfilter, 88 .d_discard = nodiscard, 89 .d_flag = D_OTHER 90 }; 91 92 void 93 kttcpattach(int count) 94 { 95 /* Do nothing. */ 96 } 97 98 int 99 kttcpioctl(dev_t dev, u_long cmd, void *data, int flag, 100 struct lwp *l) 101 { 102 int error; 103 104 if ((flag & FWRITE) == 0) 105 return EPERM; 106 107 switch (cmd) { 108 case KTTCP_IO_SEND: 109 error = kttcp_send(l, (struct kttcp_io_args *) data); 110 break; 111 112 case KTTCP_IO_RECV: 113 error = kttcp_recv(l, (struct kttcp_io_args *) data); 114 break; 115 116 default: 117 return EINVAL; 118 } 119 120 return error; 121 } 122 123 static int 124 kttcp_send(struct lwp *l, struct kttcp_io_args *kio) 125 { 126 struct socket *so; 127 int error; 128 struct timeval t0, t1; 129 unsigned long long len, done; 130 131 if (kio->kio_totalsize >= KTTCP_MAX_XMIT) 132 return EINVAL; 133 134 if ((error = fd_getsock(kio->kio_socket, &so)) != 0) 135 return error; 136 137 len = kio->kio_totalsize; 138 microtime(&t0); 139 do { 140 error = kttcp_sosend(so, len, &done, l, 0); 141 len -= done; 142 } while (error == 0 && len > 0); 143 144 fd_putfile(kio->kio_socket); 145 146 microtime(&t1); 147 if (error != 0) 148 return error; 149 timersub(&t1, &t0, &kio->kio_elapsed); 150 151 kio->kio_bytesdone = kio->kio_totalsize - len; 152 153 return 0; 154 } 155 156 static int 157 kttcp_recv(struct lwp *l, struct kttcp_io_args *kio) 158 { 159 struct socket *so; 160 int error; 161 struct timeval t0, t1; 162 unsigned long long len, done; 163 164 done = 0; /* XXX gcc */ 165 166 if (kio->kio_totalsize > KTTCP_MAX_XMIT) 167 return EINVAL; 168 169 if ((error = fd_getsock(kio->kio_socket, &so)) != 0) 170 return error; 171 len = kio->kio_totalsize; 172 microtime(&t0); 173 do { 174 error = kttcp_soreceive(so, len, &done, l, NULL); 175 len -= done; 176 } while (error == 0 && len > 0 && done > 0); 177 178 fd_putfile(kio->kio_socket); 179 180 microtime(&t1); 181 if (error == EPIPE) 182 error = 0; 183 if (error != 0) 184 return error; 185 timersub(&t1, &t0, &kio->kio_elapsed); 186 187 kio->kio_bytesdone = kio->kio_totalsize - len; 188 189 return 0; 190 } 191 192 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 193 194 /* 195 * Slightly changed version of sosend() 196 */ 197 static int 198 kttcp_sosend(struct socket *so, unsigned long long slen, 199 unsigned long long *done, struct lwp *l, int flags) 200 { 201 struct mbuf **mp, *m, *top; 202 long space, len, mlen; 203 int error, dontroute, atomic; 204 long long resid; 205 206 atomic = sosendallatonce(so); 207 resid = slen; 208 top = NULL; 209 /* 210 * In theory resid should be unsigned. 211 * However, space must be signed, as it might be less than 0 212 * if we over-committed, and we must use a signed comparison 213 * of space and resid. On the other hand, a negative resid 214 * causes us to loop sending 0-length segments to the protocol. 215 */ 216 if (resid < 0) { 217 error = EINVAL; 218 goto out; 219 } 220 dontroute = 221 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 222 (so->so_proto->pr_flags & PR_ATOMIC); 223 l->l_ru.ru_msgsnd++; 224 #define snderr(errno) { error = errno; goto release; } 225 solock(so); 226 restart: 227 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 228 goto out; 229 do { 230 if (so->so_state & SS_CANTSENDMORE) 231 snderr(EPIPE); 232 if (so->so_error) { 233 error = so->so_error; 234 so->so_error = 0; 235 goto release; 236 } 237 if ((so->so_state & SS_ISCONNECTED) == 0) { 238 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 239 snderr(ENOTCONN); 240 } else { 241 snderr(EDESTADDRREQ); 242 } 243 } 244 space = sbspace(&so->so_snd); 245 if (flags & MSG_OOB) 246 space += 1024; 247 if ((atomic && resid > so->so_snd.sb_hiwat)) 248 snderr(EMSGSIZE); 249 if (space < resid && (atomic || space < so->so_snd.sb_lowat)) { 250 if (so->so_state & SS_NBIO) 251 snderr(EWOULDBLOCK); 252 SBLASTRECORDCHK(&so->so_rcv, 253 "kttcp_soreceive sbwait 1"); 254 SBLASTMBUFCHK(&so->so_rcv, 255 "kttcp_soreceive sbwait 1"); 256 sbunlock(&so->so_snd); 257 error = sbwait(&so->so_snd); 258 if (error) 259 goto out; 260 goto restart; 261 } 262 mp = ⊤ 263 do { 264 sounlock(so); 265 do { 266 if (top == 0) { 267 m = m_gethdr(M_WAIT, MT_DATA); 268 mlen = MHLEN; 269 m->m_pkthdr.len = 0; 270 m_reset_rcvif(m); 271 } else { 272 m = m_get(M_WAIT, MT_DATA); 273 mlen = MLEN; 274 } 275 if (resid >= MINCLSIZE && space >= MCLBYTES) { 276 m_clget(m, M_WAIT); 277 if ((m->m_flags & M_EXT) == 0) 278 goto nopages; 279 mlen = MCLBYTES; 280 #ifdef MAPPED_MBUFS 281 len = lmin(MCLBYTES, resid); 282 #else 283 if (atomic && top == 0) { 284 len = lmin(MCLBYTES - max_hdr, 285 resid); 286 m->m_data += max_hdr; 287 } else 288 len = lmin(MCLBYTES, resid); 289 #endif 290 space -= len; 291 } else { 292 nopages: 293 len = lmin(lmin(mlen, resid), space); 294 space -= len; 295 /* 296 * For datagram protocols, leave room 297 * for protocol headers in first mbuf. 298 */ 299 if (atomic && top == 0 && len < mlen) 300 m_align(m, len); 301 } 302 resid -= len; 303 m->m_len = len; 304 *mp = m; 305 top->m_pkthdr.len += len; 306 if (error) 307 goto release; 308 mp = &m->m_next; 309 if (resid <= 0) { 310 if (flags & MSG_EOR) 311 top->m_flags |= M_EOR; 312 break; 313 } 314 } while (space > 0 && atomic); 315 solock(so); 316 317 if (so->so_state & SS_CANTSENDMORE) 318 snderr(EPIPE); 319 if (dontroute) 320 so->so_options |= SO_DONTROUTE; 321 if (resid > 0) 322 so->so_state |= SS_MORETOCOME; 323 if (flags & MSG_OOB) 324 error = (*so->so_proto->pr_usrreqs->pr_sendoob)(so, 325 top, NULL); 326 else 327 error = (*so->so_proto->pr_usrreqs->pr_send)(so, 328 top, NULL, NULL, l); 329 if (dontroute) 330 so->so_options &= ~SO_DONTROUTE; 331 if (resid > 0) 332 so->so_state &= ~SS_MORETOCOME; 333 top = 0; 334 mp = ⊤ 335 if (error) 336 goto release; 337 } while (resid && space > 0); 338 } while (resid); 339 340 release: 341 sbunlock(&so->so_snd); 342 out: 343 sounlock(so); 344 m_freem(top); 345 *done = slen - resid; 346 #if 0 347 printf("sosend: error %d slen %llu resid %lld\n", error, slen, resid); 348 #endif 349 return (error); 350 } 351 352 static int 353 kttcp_soreceive(struct socket *so, unsigned long long slen, 354 unsigned long long *done, struct lwp *l, int *flagsp) 355 { 356 struct mbuf *m, **mp; 357 int flags, len, error, offset, moff, type; 358 long long orig_resid, resid; 359 const struct protosw *pr; 360 struct mbuf *nextrecord; 361 362 pr = so->so_proto; 363 mp = NULL; 364 type = 0; 365 resid = orig_resid = slen; 366 if (flagsp) 367 flags = *flagsp &~ MSG_EOR; 368 else 369 flags = 0; 370 if (flags & MSG_OOB) { 371 m = m_get(M_WAIT, MT_DATA); 372 solock(so); 373 error = (*pr->pr_usrreqs->pr_recvoob)(so, m, flags & MSG_PEEK); 374 sounlock(so); 375 if (error) 376 goto bad; 377 do { 378 resid -= uimin(resid, m->m_len); 379 m = m_free(m); 380 } while (resid && error == 0 && m); 381 bad: 382 m_freem(m); 383 return (error); 384 } 385 if (mp) 386 *mp = NULL; 387 solock(so); 388 restart: 389 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 390 return (error); 391 m = so->so_rcv.sb_mb; 392 /* 393 * If we have less data than requested, block awaiting more 394 * (subject to any timeout) if: 395 * 1. the current count is less than the low water mark, 396 * 2. MSG_WAITALL is set, and it is possible to do the entire 397 * receive operation at once if we block (resid <= hiwat), or 398 * 3. MSG_DONTWAIT is not set. 399 * If MSG_WAITALL is set but resid is larger than the receive buffer, 400 * we have to do the receive in sections, and thus risk returning 401 * a short count if a timeout or signal occurs after we start. 402 */ 403 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 404 so->so_rcv.sb_cc < resid) && 405 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 406 ((flags & MSG_WAITALL) && resid <= so->so_rcv.sb_hiwat)) && 407 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 408 #ifdef DIAGNOSTIC 409 if (m == NULL && so->so_rcv.sb_cc) 410 panic("receive 1"); 411 #endif 412 if (so->so_error) { 413 if (m) 414 goto dontblock; 415 error = so->so_error; 416 if ((flags & MSG_PEEK) == 0) 417 so->so_error = 0; 418 goto release; 419 } 420 if (so->so_state & SS_CANTRCVMORE) { 421 if (m) 422 goto dontblock; 423 else 424 goto release; 425 } 426 for (; m; m = m->m_next) 427 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 428 m = so->so_rcv.sb_mb; 429 goto dontblock; 430 } 431 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 432 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 433 error = ENOTCONN; 434 goto release; 435 } 436 if (resid == 0) 437 goto release; 438 if ((so->so_state & SS_NBIO) || 439 (flags & (MSG_DONTWAIT|MSG_NBIO))) { 440 error = EWOULDBLOCK; 441 goto release; 442 } 443 sbunlock(&so->so_rcv); 444 error = sbwait(&so->so_rcv); 445 if (error) { 446 sounlock(so); 447 return (error); 448 } 449 goto restart; 450 } 451 dontblock: 452 /* 453 * On entry here, m points to the first record of the socket buffer. 454 * While we process the initial mbufs containing address and control 455 * info, we save a copy of m->m_nextpkt into nextrecord. 456 */ 457 #ifdef notyet /* XXXX */ 458 if (uio->uio_lwp) 459 uio->uio_lwp->l_ru.ru_msgrcv++; 460 #endif 461 KASSERT(m == so->so_rcv.sb_mb); 462 SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 1"); 463 SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 1"); 464 nextrecord = m->m_nextpkt; 465 if (pr->pr_flags & PR_ADDR) { 466 #ifdef DIAGNOSTIC 467 if (m->m_type != MT_SONAME) 468 panic("receive 1a"); 469 #endif 470 orig_resid = 0; 471 if (flags & MSG_PEEK) { 472 m = m->m_next; 473 } else { 474 sbfree(&so->so_rcv, m); 475 m = so->so_rcv.sb_mb = m_free(m); 476 } 477 } 478 while (m && m->m_type == MT_CONTROL && error == 0) { 479 if (flags & MSG_PEEK) { 480 m = m->m_next; 481 } else { 482 sbfree(&so->so_rcv, m); 483 m = so->so_rcv.sb_mb = m_free(m); 484 } 485 } 486 487 /* 488 * If m is non-NULL, we have some data to read. From now on, 489 * make sure to keep sb_lastrecord consistent when working on 490 * the last packet on the chain (nextrecord == NULL) and we 491 * change m->m_nextpkt. 492 */ 493 if (m) { 494 if ((flags & MSG_PEEK) == 0) { 495 m->m_nextpkt = nextrecord; 496 /* 497 * If nextrecord == NULL (this is a single chain), 498 * then sb_lastrecord may not be valid here if m 499 * was changed earlier. 500 */ 501 if (nextrecord == NULL) { 502 KASSERT(so->so_rcv.sb_mb == m); 503 so->so_rcv.sb_lastrecord = m; 504 } 505 } 506 type = m->m_type; 507 if (type == MT_OOBDATA) 508 flags |= MSG_OOB; 509 } else { 510 if ((flags & MSG_PEEK) == 0) { 511 KASSERT(so->so_rcv.sb_mb == m); 512 so->so_rcv.sb_mb = nextrecord; 513 SB_EMPTY_FIXUP(&so->so_rcv); 514 } 515 } 516 SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 2"); 517 SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 2"); 518 519 moff = 0; 520 offset = 0; 521 while (m && resid > 0 && error == 0) { 522 if (m->m_type == MT_OOBDATA) { 523 if (type != MT_OOBDATA) 524 break; 525 } else if (type == MT_OOBDATA) 526 break; 527 #ifdef DIAGNOSTIC 528 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 529 panic("receive 3"); 530 #endif 531 so->so_state &= ~SS_RCVATMARK; 532 len = resid; 533 if (so->so_oobmark && len > so->so_oobmark - offset) 534 len = so->so_oobmark - offset; 535 if (len > m->m_len - moff) 536 len = m->m_len - moff; 537 /* 538 * If mp is set, just pass back the mbufs. 539 * Otherwise copy them out via the uio, then free. 540 * Sockbuf must be consistent here (points to current mbuf, 541 * it points to next record) when we drop priority; 542 * we must note any additions to the sockbuf when we 543 * block interrupts again. 544 */ 545 resid -= len; 546 if (len == m->m_len - moff) { 547 if (m->m_flags & M_EOR) 548 flags |= MSG_EOR; 549 if (flags & MSG_PEEK) { 550 m = m->m_next; 551 moff = 0; 552 } else { 553 nextrecord = m->m_nextpkt; 554 sbfree(&so->so_rcv, m); 555 if (mp) { 556 *mp = m; 557 mp = &m->m_next; 558 so->so_rcv.sb_mb = m = m->m_next; 559 *mp = NULL; 560 } else { 561 m = so->so_rcv.sb_mb = m_free(m); 562 } 563 /* 564 * If m != NULL, we also know that 565 * so->so_rcv.sb_mb != NULL. 566 */ 567 KASSERT(so->so_rcv.sb_mb == m); 568 if (m) { 569 m->m_nextpkt = nextrecord; 570 if (nextrecord == NULL) 571 so->so_rcv.sb_lastrecord = m; 572 } else { 573 so->so_rcv.sb_mb = nextrecord; 574 SB_EMPTY_FIXUP(&so->so_rcv); 575 } 576 SBLASTRECORDCHK(&so->so_rcv, 577 "kttcp_soreceive 3"); 578 SBLASTMBUFCHK(&so->so_rcv, 579 "kttcp_soreceive 3"); 580 } 581 } else { 582 if (flags & MSG_PEEK) 583 moff += len; 584 else { 585 if (mp) { 586 sounlock(so); 587 *mp = m_copym(m, 0, len, M_WAIT); 588 solock(so); 589 } 590 m->m_data += len; 591 m->m_len -= len; 592 so->so_rcv.sb_cc -= len; 593 } 594 } 595 if (so->so_oobmark) { 596 if ((flags & MSG_PEEK) == 0) { 597 so->so_oobmark -= len; 598 if (so->so_oobmark == 0) { 599 so->so_state |= SS_RCVATMARK; 600 break; 601 } 602 } else { 603 offset += len; 604 if (offset == so->so_oobmark) 605 break; 606 } 607 } 608 if (flags & MSG_EOR) 609 break; 610 /* 611 * If the MSG_WAITALL flag is set (for non-atomic socket), 612 * we must not quit until "uio->uio_resid == 0" or an error 613 * termination. If a signal/timeout occurs, return 614 * with a short count but without error. 615 * Keep sockbuf locked against other readers. 616 */ 617 while (flags & MSG_WAITALL && m == NULL && resid > 0 && 618 !sosendallatonce(so) && !nextrecord) { 619 if (so->so_error || so->so_state & SS_CANTRCVMORE) 620 break; 621 /* 622 * If we are peeking and the socket receive buffer is 623 * full, stop since we can't get more data to peek at. 624 */ 625 if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) 626 break; 627 /* 628 * If we've drained the socket buffer, tell the 629 * protocol in case it needs to do something to 630 * get it filled again. 631 */ 632 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) { 633 (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); 634 } 635 SBLASTRECORDCHK(&so->so_rcv, 636 "kttcp_soreceive sbwait 2"); 637 SBLASTMBUFCHK(&so->so_rcv, 638 "kttcp_soreceive sbwait 2"); 639 error = sbwait(&so->so_rcv); 640 if (error) { 641 sbunlock(&so->so_rcv); 642 sounlock(so); 643 return (0); 644 } 645 if ((m = so->so_rcv.sb_mb) != NULL) 646 nextrecord = m->m_nextpkt; 647 } 648 } 649 650 if (m && pr->pr_flags & PR_ATOMIC) { 651 flags |= MSG_TRUNC; 652 if ((flags & MSG_PEEK) == 0) 653 (void) sbdroprecord(&so->so_rcv); 654 } 655 if ((flags & MSG_PEEK) == 0) { 656 if (m == NULL) { 657 /* 658 * First part is an SB_EMPTY_FIXUP(). Second part 659 * makes sure sb_lastrecord is up-to-date if 660 * there is still data in the socket buffer. 661 */ 662 so->so_rcv.sb_mb = nextrecord; 663 if (so->so_rcv.sb_mb == NULL) { 664 so->so_rcv.sb_mbtail = NULL; 665 so->so_rcv.sb_lastrecord = NULL; 666 } else if (nextrecord->m_nextpkt == NULL) 667 so->so_rcv.sb_lastrecord = nextrecord; 668 } 669 SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 4"); 670 SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 4"); 671 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) { 672 (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); 673 } 674 } 675 if (orig_resid == resid && orig_resid && 676 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 677 sbunlock(&so->so_rcv); 678 goto restart; 679 } 680 681 if (flagsp) 682 *flagsp |= flags; 683 release: 684 sbunlock(&so->so_rcv); 685 sounlock(so); 686 *done = slen - resid; 687 #if 0 688 printf("soreceive: error %d slen %llu resid %lld\n", error, slen, resid); 689 #endif 690 return (error); 691 } 692