1 /* $NetBSD: tcp_output.c,v 1.152 2006/11/23 23:12:59 martin Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 34 * 35 * NRL grants permission for redistribution and use in source and binary 36 * forms, with or without modification, of the software and documentation 37 * created at NRL provided that the following conditions are met: 38 * 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgements: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * This product includes software developed at the Information 49 * Technology Division, US Naval Research Laboratory. 50 * 4. Neither the name of the NRL nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 55 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 56 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 57 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 58 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 59 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 61 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 62 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 63 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 * 66 * The views and conclusions contained in the software and documentation 67 * are those of the authors and should not be interpreted as representing 68 * official policies, either expressed or implied, of the US Naval 69 * Research Laboratory (NRL). 70 */ 71 72 /*- 73 * Copyright (c) 1997, 1998, 2001, 2005, 2006 The NetBSD Foundation, Inc. 74 * All rights reserved. 75 * 76 * This code is derived from software contributed to The NetBSD Foundation 77 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 78 * Facility, NASA Ames Research Center. 79 * This code is derived from software contributed to The NetBSD Foundation 80 * by Charles M. Hannum. 81 * This code is derived from software contributed to The NetBSD Foundation 82 * by Rui Paulo. 83 * 84 * Redistribution and use in source and binary forms, with or without 85 * modification, are permitted provided that the following conditions 86 * are met: 87 * 1. Redistributions of source code must retain the above copyright 88 * notice, this list of conditions and the following disclaimer. 89 * 2. Redistributions in binary form must reproduce the above copyright 90 * notice, this list of conditions and the following disclaimer in the 91 * documentation and/or other materials provided with the distribution. 92 * 3. All advertising materials mentioning features or use of this software 93 * must display the following acknowledgement: 94 * This product includes software developed by the NetBSD 95 * Foundation, Inc. and its contributors. 96 * 4. Neither the name of The NetBSD Foundation nor the names of its 97 * contributors may be used to endorse or promote products derived 98 * from this software without specific prior written permission. 99 * 100 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 101 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 102 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 103 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 104 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 105 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 106 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 107 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 108 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 109 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 110 * POSSIBILITY OF SUCH DAMAGE. 111 */ 112 113 /* 114 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 115 * The Regents of the University of California. All rights reserved. 116 * 117 * Redistribution and use in source and binary forms, with or without 118 * modification, are permitted provided that the following conditions 119 * are met: 120 * 1. Redistributions of source code must retain the above copyright 121 * notice, this list of conditions and the following disclaimer. 122 * 2. Redistributions in binary form must reproduce the above copyright 123 * notice, this list of conditions and the following disclaimer in the 124 * documentation and/or other materials provided with the distribution. 125 * 3. Neither the name of the University nor the names of its contributors 126 * may be used to endorse or promote products derived from this software 127 * without specific prior written permission. 128 * 129 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 130 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 131 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 132 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 133 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 134 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 135 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 136 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 137 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 138 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 139 * SUCH DAMAGE. 140 * 141 * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 142 */ 143 144 #include <sys/cdefs.h> 145 __KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.152 2006/11/23 23:12:59 martin Exp $"); 146 147 #include "opt_inet.h" 148 #include "opt_ipsec.h" 149 #include "opt_tcp_debug.h" 150 151 #include <sys/param.h> 152 #include <sys/systm.h> 153 #include <sys/malloc.h> 154 #include <sys/mbuf.h> 155 #include <sys/protosw.h> 156 #include <sys/socket.h> 157 #include <sys/socketvar.h> 158 #include <sys/errno.h> 159 #include <sys/domain.h> 160 #include <sys/kernel.h> 161 #ifdef TCP_SIGNATURE 162 #include <sys/md5.h> 163 #endif 164 165 #include <net/if.h> 166 #include <net/route.h> 167 168 #include <netinet/in.h> 169 #include <netinet/in_systm.h> 170 #include <netinet/ip.h> 171 #include <netinet/in_pcb.h> 172 #include <netinet/ip_var.h> 173 174 #ifdef INET6 175 #ifndef INET 176 #include <netinet/in.h> 177 #endif 178 #include <netinet/ip6.h> 179 #include <netinet6/in6_var.h> 180 #include <netinet6/ip6_var.h> 181 #include <netinet6/in6_pcb.h> 182 #include <netinet6/nd6.h> 183 #endif 184 185 #ifdef FAST_IPSEC 186 #include <netipsec/ipsec.h> 187 #include <netipsec/key.h> 188 #endif /* FAST_IPSEC*/ 189 #ifdef IPSEC 190 #include <netinet6/ipsec.h> 191 #endif 192 193 #include <netinet/tcp.h> 194 #define TCPOUTFLAGS 195 #include <netinet/tcp_fsm.h> 196 #include <netinet/tcp_seq.h> 197 #include <netinet/tcp_timer.h> 198 #include <netinet/tcp_var.h> 199 #include <netinet/tcp_congctl.h> 200 #include <netinet/tcpip.h> 201 #include <netinet/tcp_debug.h> 202 #include <netinet/in_offload.h> 203 #include <netinet6/in6_offload.h> 204 205 #ifdef IPSEC 206 #include <netkey/key.h> 207 #endif 208 209 #ifdef notyet 210 extern struct mbuf *m_copypack(); 211 #endif 212 213 /* 214 * Knob to enable Congestion Window Monitoring, and control 215 * the burst size it allows. Default burst is 4 packets, per 216 * the Internet draft. 217 */ 218 int tcp_cwm = 0; 219 int tcp_cwm_burstsize = 4; 220 221 #ifdef TCP_OUTPUT_COUNTERS 222 #include <sys/device.h> 223 224 extern struct evcnt tcp_output_bigheader; 225 extern struct evcnt tcp_output_predict_hit; 226 extern struct evcnt tcp_output_predict_miss; 227 extern struct evcnt tcp_output_copysmall; 228 extern struct evcnt tcp_output_copybig; 229 extern struct evcnt tcp_output_refbig; 230 231 #define TCP_OUTPUT_COUNTER_INCR(ev) (ev)->ev_count++ 232 #else 233 234 #define TCP_OUTPUT_COUNTER_INCR(ev) /* nothing */ 235 236 #endif /* TCP_OUTPUT_COUNTERS */ 237 238 static 239 #ifndef GPROF 240 inline 241 #endif 242 int 243 tcp_segsize(struct tcpcb *tp, int *txsegsizep, int *rxsegsizep, 244 boolean_t *alwaysfragp) 245 { 246 #ifdef INET 247 struct inpcb *inp = tp->t_inpcb; 248 #endif 249 #ifdef INET6 250 struct in6pcb *in6p = tp->t_in6pcb; 251 #endif 252 struct socket *so = NULL; 253 struct rtentry *rt; 254 struct ifnet *ifp; 255 int size; 256 int hdrlen; 257 int optlen; 258 259 *alwaysfragp = FALSE; 260 261 #ifdef DIAGNOSTIC 262 if (tp->t_inpcb && tp->t_in6pcb) 263 panic("tcp_segsize: both t_inpcb and t_in6pcb are set"); 264 #endif 265 switch (tp->t_family) { 266 #ifdef INET 267 case AF_INET: 268 hdrlen = sizeof(struct ip) + sizeof(struct tcphdr); 269 break; 270 #endif 271 #ifdef INET6 272 case AF_INET6: 273 hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 274 break; 275 #endif 276 default: 277 size = tcp_mssdflt; 278 goto out; 279 } 280 281 rt = NULL; 282 #ifdef INET 283 if (inp) { 284 rt = in_pcbrtentry(inp); 285 so = inp->inp_socket; 286 } 287 #endif 288 #ifdef INET6 289 if (in6p) { 290 rt = in6_pcbrtentry(in6p); 291 so = in6p->in6p_socket; 292 } 293 #endif 294 if (rt == NULL) { 295 size = tcp_mssdflt; 296 goto out; 297 } 298 299 ifp = rt->rt_ifp; 300 301 size = tcp_mssdflt; 302 if (tp->t_mtudisc && rt->rt_rmx.rmx_mtu != 0) { 303 #ifdef INET6 304 if (in6p && rt->rt_rmx.rmx_mtu < IPV6_MMTU) { 305 /* 306 * RFC2460 section 5, last paragraph: if path MTU is 307 * smaller than 1280, use 1280 as packet size and 308 * attach fragment header. 309 */ 310 size = IPV6_MMTU - hdrlen - sizeof(struct ip6_frag); 311 *alwaysfragp = TRUE; 312 } else 313 size = rt->rt_rmx.rmx_mtu - hdrlen; 314 #else 315 size = rt->rt_rmx.rmx_mtu - hdrlen; 316 #endif 317 } else if (ifp->if_flags & IFF_LOOPBACK) 318 size = ifp->if_mtu - hdrlen; 319 #ifdef INET 320 else if (inp && tp->t_mtudisc) 321 size = ifp->if_mtu - hdrlen; 322 else if (inp && in_localaddr(inp->inp_faddr)) 323 size = ifp->if_mtu - hdrlen; 324 #endif 325 #ifdef INET6 326 else if (in6p) { 327 #ifdef INET 328 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) { 329 /* mapped addr case */ 330 struct in_addr d; 331 bcopy(&in6p->in6p_faddr.s6_addr32[3], &d, sizeof(d)); 332 if (tp->t_mtudisc || in_localaddr(d)) 333 size = ifp->if_mtu - hdrlen; 334 } else 335 #endif 336 { 337 /* 338 * for IPv6, path MTU discovery is always turned on, 339 * or the node must use packet size <= 1280. 340 */ 341 size = tp->t_mtudisc ? IN6_LINKMTU(ifp) : IPV6_MMTU; 342 size -= hdrlen; 343 } 344 } 345 #endif 346 out: 347 /* 348 * Now we must make room for whatever extra TCP/IP options are in 349 * the packet. 350 */ 351 optlen = tcp_optlen(tp); 352 353 /* 354 * XXX tp->t_ourmss should have the right size, but without this code 355 * fragmentation will occur... need more investigation 356 */ 357 #ifdef INET 358 if (inp) { 359 #if defined(IPSEC) || defined(FAST_IPSEC) 360 if (! IPSEC_PCB_SKIP_IPSEC(inp->inp_sp, IPSEC_DIR_OUTBOUND)) 361 optlen += ipsec4_hdrsiz_tcp(tp); 362 #endif 363 optlen += ip_optlen(inp); 364 } 365 #endif 366 #ifdef INET6 367 #ifdef INET 368 if (in6p && tp->t_family == AF_INET) { 369 #if defined(IPSEC) || defined(FAST_IPSEC) 370 if (! IPSEC_PCB_SKIP_IPSEC(in6p->in6p_sp, IPSEC_DIR_OUTBOUND)) 371 optlen += ipsec4_hdrsiz_tcp(tp); 372 #endif 373 /* XXX size -= ip_optlen(in6p); */ 374 } else 375 #endif 376 if (in6p && tp->t_family == AF_INET6) { 377 #ifdef IPSEC 378 if (! IPSEC_PCB_SKIP_IPSEC(in6p->in6p_sp, IPSEC_DIR_OUTBOUND)) 379 optlen += ipsec6_hdrsiz_tcp(tp); 380 #endif 381 optlen += ip6_optlen(in6p); 382 } 383 #endif 384 size -= optlen; 385 386 /* there may not be any room for data if mtu is too small */ 387 if (size < 0) 388 return (EMSGSIZE); 389 390 /* 391 * *rxsegsizep holds *estimated* inbound segment size (estimation 392 * assumes that path MTU is the same for both ways). this is only 393 * for silly window avoidance, do not use the value for other purposes. 394 * 395 * ipseclen is subtracted from both sides, this may not be right. 396 * I'm not quite sure about this (could someone comment). 397 */ 398 *txsegsizep = min(tp->t_peermss - optlen, size); 399 /* 400 * Never send more than half a buffer full. This insures that we can 401 * always keep 2 packets on the wire, no matter what SO_SNDBUF is, and 402 * therefore acks will never be delayed unless we run out of data to 403 * transmit. 404 */ 405 if (so) 406 *txsegsizep = min(so->so_snd.sb_hiwat >> 1, *txsegsizep); 407 *rxsegsizep = min(tp->t_ourmss - optlen, size); 408 409 if (*txsegsizep != tp->t_segsz) { 410 /* 411 * If the new segment size is larger, we don't want to 412 * mess up the congestion window, but if it is smaller 413 * we'll have to reduce the congestion window to ensure 414 * that we don't get into trouble with initial windows 415 * and the rest. In any case, if the segment size 416 * has changed, chances are the path has, too, and 417 * our congestion window will be different. 418 */ 419 if (*txsegsizep < tp->t_segsz) { 420 tp->snd_cwnd = max((tp->snd_cwnd / tp->t_segsz) 421 * *txsegsizep, *txsegsizep); 422 tp->snd_ssthresh = max((tp->snd_ssthresh / tp->t_segsz) 423 * *txsegsizep, *txsegsizep); 424 } 425 tp->t_segsz = *txsegsizep; 426 } 427 428 return (0); 429 } 430 431 static 432 #ifndef GPROF 433 inline 434 #endif 435 int 436 tcp_build_datapkt(struct tcpcb *tp, struct socket *so, int off, 437 long len, int hdrlen, struct mbuf **mp) 438 { 439 struct mbuf *m, *m0; 440 441 if (tp->t_force && len == 1) 442 tcpstat.tcps_sndprobe++; 443 else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { 444 tcpstat.tcps_sndrexmitpack++; 445 tcpstat.tcps_sndrexmitbyte += len; 446 } else { 447 tcpstat.tcps_sndpack++; 448 tcpstat.tcps_sndbyte += len; 449 } 450 #ifdef notyet 451 if ((m = m_copypack(so->so_snd.sb_mb, off, 452 (int)len, max_linkhdr + hdrlen)) == 0) 453 return (ENOBUFS); 454 /* 455 * m_copypack left space for our hdr; use it. 456 */ 457 m->m_len += hdrlen; 458 m->m_data -= hdrlen; 459 #else 460 MGETHDR(m, M_DONTWAIT, MT_HEADER); 461 if (__predict_false(m == NULL)) 462 return (ENOBUFS); 463 MCLAIM(m, &tcp_tx_mowner); 464 465 /* 466 * XXX Because other code assumes headers will fit in 467 * XXX one header mbuf. 468 * 469 * (This code should almost *never* be run.) 470 */ 471 if (__predict_false((max_linkhdr + hdrlen) > MHLEN)) { 472 TCP_OUTPUT_COUNTER_INCR(&tcp_output_bigheader); 473 MCLGET(m, M_DONTWAIT); 474 if ((m->m_flags & M_EXT) == 0) { 475 m_freem(m); 476 return (ENOBUFS); 477 } 478 } 479 480 m->m_data += max_linkhdr; 481 m->m_len = hdrlen; 482 483 /* 484 * To avoid traversing the whole sb_mb chain for correct 485 * data to send, remember last sent mbuf, its offset and 486 * the sent size. When called the next time, see if the 487 * data to send is directly following the previous transfer. 488 * This is important for large TCP windows. 489 */ 490 if (off == 0 || tp->t_lastm == NULL || 491 (tp->t_lastoff + tp->t_lastlen) != off) { 492 TCP_OUTPUT_COUNTER_INCR(&tcp_output_predict_miss); 493 /* 494 * Either a new packet or a retransmit. 495 * Start from the beginning. 496 */ 497 tp->t_lastm = so->so_snd.sb_mb; 498 tp->t_inoff = off; 499 } else { 500 TCP_OUTPUT_COUNTER_INCR(&tcp_output_predict_hit); 501 tp->t_inoff += tp->t_lastlen; 502 } 503 504 /* Traverse forward to next packet */ 505 while (tp->t_inoff > 0) { 506 if (tp->t_lastm == NULL) 507 panic("tp->t_lastm == NULL"); 508 if (tp->t_inoff < tp->t_lastm->m_len) 509 break; 510 tp->t_inoff -= tp->t_lastm->m_len; 511 tp->t_lastm = tp->t_lastm->m_next; 512 } 513 514 tp->t_lastoff = off; 515 tp->t_lastlen = len; 516 m0 = tp->t_lastm; 517 off = tp->t_inoff; 518 519 if (len <= M_TRAILINGSPACE(m)) { 520 m_copydata(m0, off, (int) len, mtod(m, caddr_t) + hdrlen); 521 m->m_len += len; 522 TCP_OUTPUT_COUNTER_INCR(&tcp_output_copysmall); 523 } else { 524 m->m_next = m_copy(m0, off, (int) len); 525 if (m->m_next == NULL) { 526 m_freem(m); 527 return (ENOBUFS); 528 } 529 #ifdef TCP_OUTPUT_COUNTERS 530 if (m->m_next->m_flags & M_EXT) 531 TCP_OUTPUT_COUNTER_INCR(&tcp_output_refbig); 532 else 533 TCP_OUTPUT_COUNTER_INCR(&tcp_output_copybig); 534 #endif /* TCP_OUTPUT_COUNTERS */ 535 } 536 #endif 537 538 *mp = m; 539 return (0); 540 } 541 542 /* 543 * Tcp output routine: figure out what should be sent and send it. 544 */ 545 int 546 tcp_output(struct tcpcb *tp) 547 { 548 struct socket *so; 549 struct route *ro; 550 long len, win; 551 int off, flags, error; 552 struct mbuf *m; 553 struct ip *ip; 554 #ifdef INET6 555 struct ip6_hdr *ip6; 556 #endif 557 struct tcphdr *th; 558 u_char opt[MAX_TCPOPTLEN]; 559 unsigned optlen, hdrlen, packetlen; 560 unsigned int sack_numblks; 561 int idle, sendalot, txsegsize, rxsegsize; 562 int txsegsize_nosack; 563 int maxburst = TCP_MAXBURST; 564 int af; /* address family on the wire */ 565 int iphdrlen; 566 int has_tso4, has_tso6; 567 int has_tso, use_tso; 568 boolean_t alwaysfrag; 569 int sack_rxmit; 570 int sack_bytes_rxmt; 571 struct sackhole *p; 572 #ifdef TCP_SIGNATURE 573 int sigoff = 0; 574 #endif 575 576 #ifdef DIAGNOSTIC 577 if (tp->t_inpcb && tp->t_in6pcb) 578 panic("tcp_output: both t_inpcb and t_in6pcb are set"); 579 #endif 580 so = NULL; 581 ro = NULL; 582 if (tp->t_inpcb) { 583 so = tp->t_inpcb->inp_socket; 584 ro = &tp->t_inpcb->inp_route; 585 } 586 #ifdef INET6 587 else if (tp->t_in6pcb) { 588 so = tp->t_in6pcb->in6p_socket; 589 ro = (struct route *)&tp->t_in6pcb->in6p_route; 590 } 591 #endif 592 593 switch (af = tp->t_family) { 594 #ifdef INET 595 case AF_INET: 596 if (tp->t_inpcb) 597 break; 598 #ifdef INET6 599 /* mapped addr case */ 600 if (tp->t_in6pcb) 601 break; 602 #endif 603 return (EINVAL); 604 #endif 605 #ifdef INET6 606 case AF_INET6: 607 if (tp->t_in6pcb) 608 break; 609 return (EINVAL); 610 #endif 611 default: 612 return (EAFNOSUPPORT); 613 } 614 615 if (tcp_segsize(tp, &txsegsize, &rxsegsize, &alwaysfrag)) 616 return (EMSGSIZE); 617 618 idle = (tp->snd_max == tp->snd_una); 619 620 /* 621 * Determine if we can use TCP segmentation offload: 622 * - If we're using IPv4 623 * - If there is not an IPsec policy that prevents it 624 * - If the interface can do it 625 */ 626 has_tso4 = has_tso6 = FALSE; 627 #if defined(INET) 628 has_tso4 = tp->t_inpcb != NULL && 629 #if defined(IPSEC) || defined(FAST_IPSEC) 630 IPSEC_PCB_SKIP_IPSEC(tp->t_inpcb->inp_sp, 631 IPSEC_DIR_OUTBOUND) && 632 #endif 633 tp->t_inpcb->inp_route.ro_rt != NULL && 634 (tp->t_inpcb->inp_route.ro_rt->rt_ifp->if_capenable & 635 IFCAP_TSOv4) != 0; 636 #endif /* defined(INET) */ 637 #if defined(INET6) 638 has_tso6 = tp->t_in6pcb != NULL && 639 #if defined(IPSEC) || defined(FAST_IPSEC) 640 IPSEC_PCB_SKIP_IPSEC(tp->t_in6pcb->in6p_sp, 641 IPSEC_DIR_OUTBOUND) && 642 #endif 643 tp->t_in6pcb->in6p_route.ro_rt != NULL && 644 (tp->t_in6pcb->in6p_route.ro_rt->rt_ifp->if_capenable & 645 IFCAP_TSOv6) != 0; 646 #endif /* defined(INET6) */ 647 has_tso = (has_tso4 || has_tso6) && !alwaysfrag; 648 649 /* 650 * Restart Window computation. From draft-floyd-incr-init-win-03: 651 * 652 * Optionally, a TCP MAY set the restart window to the 653 * minimum of the value used for the initial window and 654 * the current value of cwnd (in other words, using a 655 * larger value for the restart window should never increase 656 * the size of cwnd). 657 */ 658 if (tcp_cwm) { 659 /* 660 * Hughes/Touch/Heidemann Congestion Window Monitoring. 661 * Count the number of packets currently pending 662 * acknowledgement, and limit our congestion window 663 * to a pre-determined allowed burst size plus that count. 664 * This prevents bursting once all pending packets have 665 * been acknowledged (i.e. transmission is idle). 666 * 667 * XXX Link this to Initial Window? 668 */ 669 tp->snd_cwnd = min(tp->snd_cwnd, 670 (tcp_cwm_burstsize * txsegsize) + 671 (tp->snd_nxt - tp->snd_una)); 672 } else { 673 if (idle && (tcp_now - tp->t_rcvtime) >= tp->t_rxtcur) { 674 /* 675 * We have been idle for "a while" and no acks are 676 * expected to clock out any data we send -- 677 * slow start to get ack "clock" running again. 678 */ 679 int ss = tcp_init_win; 680 #ifdef INET 681 if (tp->t_inpcb && 682 in_localaddr(tp->t_inpcb->inp_faddr)) 683 ss = tcp_init_win_local; 684 #endif 685 #ifdef INET6 686 if (tp->t_in6pcb && 687 in6_localaddr(&tp->t_in6pcb->in6p_faddr)) 688 ss = tcp_init_win_local; 689 #endif 690 tp->snd_cwnd = min(tp->snd_cwnd, 691 TCP_INITIAL_WINDOW(ss, txsegsize)); 692 } 693 } 694 695 txsegsize_nosack = txsegsize; 696 again: 697 use_tso = has_tso; 698 if ((tp->t_flags & (TF_ECN_SND_CWR|TF_ECN_SND_ECE)) != 0) { 699 /* don't duplicate CWR/ECE. */ 700 use_tso = 0; 701 } 702 TCP_REASS_LOCK(tp); 703 sack_numblks = tcp_sack_numblks(tp); 704 if (sack_numblks) { 705 int sackoptlen; 706 707 sackoptlen = TCP_SACK_OPTLEN(sack_numblks); 708 if (sackoptlen > txsegsize_nosack) { 709 sack_numblks = 0; /* give up SACK */ 710 txsegsize = txsegsize_nosack; 711 } else { 712 if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) { 713 /* don't duplicate D-SACK. */ 714 use_tso = 0; 715 } 716 txsegsize = txsegsize_nosack - sackoptlen; 717 } 718 } else { 719 txsegsize = txsegsize_nosack; 720 } 721 722 /* 723 * Determine length of data that should be transmitted, and 724 * flags that should be used. If there is some data or critical 725 * controls (SYN, RST) to send, then transmit; otherwise, 726 * investigate further. 727 * 728 * Readjust SACK information to avoid resending duplicate data. 729 */ 730 if (TCP_SACK_ENABLED(tp) && SEQ_LT(tp->snd_nxt, tp->snd_max)) 731 tcp_sack_adjust(tp); 732 sendalot = 0; 733 off = tp->snd_nxt - tp->snd_una; 734 win = min(tp->snd_wnd, tp->snd_cwnd); 735 736 flags = tcp_outflags[tp->t_state]; 737 738 /* 739 * Send any SACK-generated retransmissions. If we're explicitly trying 740 * to send out new data (when sendalot is 1), bypass this function. 741 * If we retransmit in fast recovery mode, decrement snd_cwnd, since 742 * we're replacing a (future) new transmission with a retransmission 743 * now, and we previously incremented snd_cwnd in tcp_input(). 744 */ 745 /* 746 * Still in sack recovery , reset rxmit flag to zero. 747 */ 748 sack_rxmit = 0; 749 sack_bytes_rxmt = 0; 750 len = 0; 751 p = NULL; 752 do { 753 long cwin; 754 if (!TCP_SACK_ENABLED(tp)) 755 break; 756 if (tp->t_partialacks < 0) 757 break; 758 p = tcp_sack_output(tp, &sack_bytes_rxmt); 759 if (p == NULL) 760 break; 761 762 cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt; 763 if (cwin < 0) 764 cwin = 0; 765 /* Do not retransmit SACK segments beyond snd_recover */ 766 if (SEQ_GT(p->end, tp->snd_recover)) { 767 /* 768 * (At least) part of sack hole extends beyond 769 * snd_recover. Check to see if we can rexmit data 770 * for this hole. 771 */ 772 if (SEQ_GEQ(p->rxmit, tp->snd_recover)) { 773 /* 774 * Can't rexmit any more data for this hole. 775 * That data will be rexmitted in the next 776 * sack recovery episode, when snd_recover 777 * moves past p->rxmit. 778 */ 779 p = NULL; 780 break; 781 } 782 /* Can rexmit part of the current hole */ 783 len = ((long)ulmin(cwin, tp->snd_recover - p->rxmit)); 784 } else 785 len = ((long)ulmin(cwin, p->end - p->rxmit)); 786 off = p->rxmit - tp->snd_una; 787 if (off + len > so->so_snd.sb_cc) { 788 /* 1 for TH_FIN */ 789 KASSERT(off + len == so->so_snd.sb_cc + 1); 790 KASSERT(p->rxmit + len == tp->snd_max); 791 len = so->so_snd.sb_cc - off; 792 } 793 if (len > 0) { 794 sack_rxmit = 1; 795 sendalot = 1; 796 } 797 } while (/*CONSTCOND*/0); 798 799 /* 800 * If in persist timeout with window of 0, send 1 byte. 801 * Otherwise, if window is small but nonzero 802 * and timer expired, we will send what we can 803 * and go to transmit state. 804 */ 805 if (tp->t_force) { 806 if (win == 0) { 807 /* 808 * If we still have some data to send, then 809 * clear the FIN bit. Usually this would 810 * happen below when it realizes that we 811 * aren't sending all the data. However, 812 * if we have exactly 1 byte of unset data, 813 * then it won't clear the FIN bit below, 814 * and if we are in persist state, we wind 815 * up sending the packet without recording 816 * that we sent the FIN bit. 817 * 818 * We can't just blindly clear the FIN bit, 819 * because if we don't have any more data 820 * to send then the probe will be the FIN 821 * itself. 822 */ 823 if (off < so->so_snd.sb_cc) 824 flags &= ~TH_FIN; 825 win = 1; 826 } else { 827 TCP_TIMER_DISARM(tp, TCPT_PERSIST); 828 tp->t_rxtshift = 0; 829 } 830 } 831 832 if (sack_rxmit == 0) { 833 if (TCP_SACK_ENABLED(tp) && tp->t_partialacks >= 0) { 834 long cwin; 835 836 /* 837 * We are inside of a SACK recovery episode and are 838 * sending new data, having retransmitted all the 839 * data possible in the scoreboard. 840 */ 841 if (tp->snd_wnd < so->so_snd.sb_cc) { 842 len = tp->snd_wnd - off; 843 flags &= ~TH_FIN; 844 } else { 845 len = so->so_snd.sb_cc - off; 846 } 847 848 /* 849 * From FreeBSD: 850 * Don't remove this (len > 0) check ! 851 * We explicitly check for len > 0 here (although it 852 * isn't really necessary), to work around a gcc 853 * optimization issue - to force gcc to compute 854 * len above. Without this check, the computation 855 * of len is bungled by the optimizer. 856 */ 857 if (len > 0) { 858 cwin = tp->snd_cwnd - 859 (tp->snd_nxt - tp->sack_newdata) - 860 sack_bytes_rxmt; 861 if (cwin < 0) 862 cwin = 0; 863 if (cwin < len) { 864 len = cwin; 865 flags &= ~TH_FIN; 866 } 867 } 868 } else if (win < so->so_snd.sb_cc) { 869 len = win - off; 870 flags &= ~TH_FIN; 871 } else { 872 len = so->so_snd.sb_cc - off; 873 } 874 } 875 876 if (len < 0) { 877 /* 878 * If FIN has been sent but not acked, 879 * but we haven't been called to retransmit, 880 * len will be -1. Otherwise, window shrank 881 * after we sent into it. If window shrank to 0, 882 * cancel pending retransmit, pull snd_nxt back 883 * to (closed) window, and set the persist timer 884 * if it isn't already going. If the window didn't 885 * close completely, just wait for an ACK. 886 * 887 * If we have a pending FIN, either it has already been 888 * transmitted or it is outside the window, so drop it. 889 * If the FIN has been transmitted, but this is not a 890 * retransmission, then len must be -1. Therefore we also 891 * prevent here the sending of `gratuitous FINs'. This 892 * eliminates the need to check for that case below (e.g. 893 * to back up snd_nxt before the FIN so that the sequence 894 * number is correct). 895 */ 896 len = 0; 897 flags &= ~TH_FIN; 898 if (win == 0) { 899 TCP_TIMER_DISARM(tp, TCPT_REXMT); 900 tp->t_rxtshift = 0; 901 tp->snd_nxt = tp->snd_una; 902 if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) 903 tcp_setpersist(tp); 904 } 905 } 906 if (len > txsegsize) { 907 if (use_tso) { 908 /* 909 * Truncate TSO transfers to IP_MAXPACKET, and make 910 * sure that we send equal size transfers down the 911 * stack (rather than big-small-big-small-...). 912 */ 913 #ifdef INET6 914 #if IPV6_MAXPACKET != IP_MAXPACKET 915 #error IPV6_MAXPACKET != IP_MAXPACKET 916 #endif 917 #endif 918 len = (min(len, IP_MAXPACKET) / txsegsize) * txsegsize; 919 if (len <= txsegsize) { 920 use_tso = 0; 921 } 922 } else 923 len = txsegsize; 924 flags &= ~TH_FIN; 925 sendalot = 1; 926 } else 927 use_tso = 0; 928 if (sack_rxmit) { 929 if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) 930 flags &= ~TH_FIN; 931 } 932 933 win = sbspace(&so->so_rcv); 934 935 /* 936 * Sender silly window avoidance. If connection is idle 937 * and can send all data, a maximum segment, 938 * at least a maximum default-size segment do it, 939 * or are forced, do it; otherwise don't bother. 940 * If peer's buffer is tiny, then send 941 * when window is at least half open. 942 * If retransmitting (possibly after persist timer forced us 943 * to send into a small window), then must resend. 944 */ 945 if (len) { 946 if (len >= txsegsize) 947 goto send; 948 if ((so->so_state & SS_MORETOCOME) == 0 && 949 ((idle || tp->t_flags & TF_NODELAY) && 950 len + off >= so->so_snd.sb_cc)) 951 goto send; 952 if (tp->t_force) 953 goto send; 954 if (len >= tp->max_sndwnd / 2) 955 goto send; 956 if (SEQ_LT(tp->snd_nxt, tp->snd_max)) 957 goto send; 958 if (sack_rxmit) 959 goto send; 960 } 961 962 /* 963 * Compare available window to amount of window known to peer 964 * (as advertised window less next expected input). If the 965 * difference is at least twice the size of the largest segment 966 * we expect to receive (i.e. two segments) or at least 50% of 967 * the maximum possible window, then want to send a window update 968 * to peer. 969 */ 970 if (win > 0) { 971 /* 972 * "adv" is the amount we can increase the window, 973 * taking into account that we are limited by 974 * TCP_MAXWIN << tp->rcv_scale. 975 */ 976 long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) - 977 (tp->rcv_adv - tp->rcv_nxt); 978 979 if (adv >= (long) (2 * rxsegsize)) 980 goto send; 981 if (2 * adv >= (long) so->so_rcv.sb_hiwat) 982 goto send; 983 } 984 985 /* 986 * Send if we owe peer an ACK. 987 */ 988 if (tp->t_flags & TF_ACKNOW) 989 goto send; 990 if (flags & (TH_SYN|TH_FIN|TH_RST)) 991 goto send; 992 if (SEQ_GT(tp->snd_up, tp->snd_una)) 993 goto send; 994 /* 995 * In SACK, it is possible for tcp_output to fail to send a segment 996 * after the retransmission timer has been turned off. Make sure 997 * that the retransmission timer is set. 998 */ 999 if (TCP_SACK_ENABLED(tp) && SEQ_GT(tp->snd_max, tp->snd_una) && 1000 !TCP_TIMER_ISARMED(tp, TCPT_REXMT) && 1001 !TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) { 1002 TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 1003 goto just_return; 1004 } 1005 1006 /* 1007 * TCP window updates are not reliable, rather a polling protocol 1008 * using ``persist'' packets is used to insure receipt of window 1009 * updates. The three ``states'' for the output side are: 1010 * idle not doing retransmits or persists 1011 * persisting to move a small or zero window 1012 * (re)transmitting and thereby not persisting 1013 * 1014 * tp->t_timer[TCPT_PERSIST] 1015 * is set when we are in persist state. 1016 * tp->t_force 1017 * is set when we are called to send a persist packet. 1018 * tp->t_timer[TCPT_REXMT] 1019 * is set when we are retransmitting 1020 * The output side is idle when both timers are zero. 1021 * 1022 * If send window is too small, there is data to transmit, and no 1023 * retransmit or persist is pending, then go to persist state. 1024 * If nothing happens soon, send when timer expires: 1025 * if window is nonzero, transmit what we can, 1026 * otherwise force out a byte. 1027 */ 1028 if (so->so_snd.sb_cc && TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0 && 1029 TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) { 1030 tp->t_rxtshift = 0; 1031 tcp_setpersist(tp); 1032 } 1033 1034 /* 1035 * No reason to send a segment, just return. 1036 */ 1037 just_return: 1038 TCP_REASS_UNLOCK(tp); 1039 return (0); 1040 1041 send: 1042 /* 1043 * Before ESTABLISHED, force sending of initial options 1044 * unless TCP set not to do any options. 1045 * NOTE: we assume that the IP/TCP header plus TCP options 1046 * always fit in a single mbuf, leaving room for a maximum 1047 * link header, i.e. 1048 * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES 1049 */ 1050 optlen = 0; 1051 switch (af) { 1052 #ifdef INET 1053 case AF_INET: 1054 iphdrlen = sizeof(struct ip) + sizeof(struct tcphdr); 1055 break; 1056 #endif 1057 #ifdef INET6 1058 case AF_INET6: 1059 iphdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 1060 break; 1061 #endif 1062 default: /*pacify gcc*/ 1063 iphdrlen = 0; 1064 break; 1065 } 1066 hdrlen = iphdrlen; 1067 if (flags & TH_SYN) { 1068 struct rtentry *rt; 1069 1070 rt = NULL; 1071 #ifdef INET 1072 if (tp->t_inpcb) 1073 rt = in_pcbrtentry(tp->t_inpcb); 1074 #endif 1075 #ifdef INET6 1076 if (tp->t_in6pcb) 1077 rt = in6_pcbrtentry(tp->t_in6pcb); 1078 #endif 1079 1080 tp->snd_nxt = tp->iss; 1081 tp->t_ourmss = tcp_mss_to_advertise(rt != NULL ? 1082 rt->rt_ifp : NULL, af); 1083 if ((tp->t_flags & TF_NOOPT) == 0) { 1084 opt[0] = TCPOPT_MAXSEG; 1085 opt[1] = 4; 1086 opt[2] = (tp->t_ourmss >> 8) & 0xff; 1087 opt[3] = tp->t_ourmss & 0xff; 1088 optlen = 4; 1089 1090 if ((tp->t_flags & TF_REQ_SCALE) && 1091 ((flags & TH_ACK) == 0 || 1092 (tp->t_flags & TF_RCVD_SCALE))) { 1093 *((u_int32_t *) (opt + optlen)) = htonl( 1094 TCPOPT_NOP << 24 | 1095 TCPOPT_WINDOW << 16 | 1096 TCPOLEN_WINDOW << 8 | 1097 tp->request_r_scale); 1098 optlen += 4; 1099 } 1100 if (tcp_do_sack) { 1101 u_int8_t *cp = (u_int8_t *)(opt + optlen); 1102 1103 cp[0] = TCPOPT_SACK_PERMITTED; 1104 cp[1] = 2; 1105 cp[2] = TCPOPT_NOP; 1106 cp[3] = TCPOPT_NOP; 1107 optlen += 4; 1108 } 1109 } 1110 } 1111 1112 /* 1113 * Send a timestamp and echo-reply if this is a SYN and our side 1114 * wants to use timestamps (TF_REQ_TSTMP is set) or both our side 1115 * and our peer have sent timestamps in our SYN's. 1116 */ 1117 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 1118 (flags & TH_RST) == 0 && 1119 ((flags & (TH_SYN|TH_ACK)) == TH_SYN || 1120 (tp->t_flags & TF_RCVD_TSTMP))) { 1121 u_int32_t *lp = (u_int32_t *)(opt + optlen); 1122 1123 /* Form timestamp option as shown in appendix A of RFC 1323. */ 1124 *lp++ = htonl(TCPOPT_TSTAMP_HDR); 1125 *lp++ = htonl(TCP_TIMESTAMP(tp)); 1126 *lp = htonl(tp->ts_recent); 1127 optlen += TCPOLEN_TSTAMP_APPA; 1128 } 1129 1130 /* 1131 * Tack on the SACK block if it is necessary. 1132 */ 1133 if (sack_numblks) { 1134 int sack_len; 1135 u_char *bp = (u_char *)(opt + optlen); 1136 u_int32_t *lp = (u_int32_t *)(bp + 4); 1137 struct ipqent *tiqe; 1138 1139 sack_len = sack_numblks * 8 + 2; 1140 bp[0] = TCPOPT_NOP; 1141 bp[1] = TCPOPT_NOP; 1142 bp[2] = TCPOPT_SACK; 1143 bp[3] = sack_len; 1144 if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) { 1145 sack_numblks--; 1146 *lp++ = htonl(tp->rcv_dsack_block.left); 1147 *lp++ = htonl(tp->rcv_dsack_block.right); 1148 tp->rcv_sack_flags &= ~TCPSACK_HAVED; 1149 } 1150 for (tiqe = TAILQ_FIRST(&tp->timeq); 1151 sack_numblks > 0; tiqe = TAILQ_NEXT(tiqe, ipqe_timeq)) { 1152 KASSERT(tiqe != NULL); 1153 sack_numblks--; 1154 *lp++ = htonl(tiqe->ipqe_seq); 1155 *lp++ = htonl(tiqe->ipqe_seq + tiqe->ipqe_len + 1156 ((tiqe->ipqe_flags & TH_FIN) != 0 ? 1 : 0)); 1157 } 1158 optlen += sack_len + 2; 1159 } 1160 TCP_REASS_UNLOCK(tp); 1161 1162 #ifdef TCP_SIGNATURE 1163 #if defined(INET6) && defined(FAST_IPSEC) 1164 if (tp->t_family == AF_INET) 1165 #endif 1166 if (tp->t_flags & TF_SIGNATURE) { 1167 u_char *bp; 1168 /* 1169 * Initialize TCP-MD5 option (RFC2385) 1170 */ 1171 bp = (u_char *)opt + optlen; 1172 *bp++ = TCPOPT_SIGNATURE; 1173 *bp++ = TCPOLEN_SIGNATURE; 1174 sigoff = optlen + 2; 1175 bzero(bp, TCP_SIGLEN); 1176 bp += TCP_SIGLEN; 1177 optlen += TCPOLEN_SIGNATURE; 1178 /* 1179 * Terminate options list and maintain 32-bit alignment. 1180 */ 1181 *bp++ = TCPOPT_NOP; 1182 *bp++ = TCPOPT_EOL; 1183 optlen += 2; 1184 } 1185 #endif /* TCP_SIGNATURE */ 1186 1187 hdrlen += optlen; 1188 1189 #ifdef DIAGNOSTIC 1190 if (!use_tso && len > txsegsize) 1191 panic("tcp data to be sent is larger than segment"); 1192 else if (use_tso && len > IP_MAXPACKET) 1193 panic("tcp data to be sent is larger than max TSO size"); 1194 if (max_linkhdr + hdrlen > MCLBYTES) 1195 panic("tcphdr too big"); 1196 #endif 1197 1198 /* 1199 * Grab a header mbuf, attaching a copy of data to 1200 * be transmitted, and initialize the header from 1201 * the template for sends on this connection. 1202 */ 1203 if (len) { 1204 error = tcp_build_datapkt(tp, so, off, len, hdrlen, &m); 1205 if (error) 1206 goto out; 1207 /* 1208 * If we're sending everything we've got, set PUSH. 1209 * (This will keep happy those implementations which only 1210 * give data to the user when a buffer fills or 1211 * a PUSH comes in.) 1212 */ 1213 if (off + len == so->so_snd.sb_cc) 1214 flags |= TH_PUSH; 1215 } else { 1216 if (tp->t_flags & TF_ACKNOW) 1217 tcpstat.tcps_sndacks++; 1218 else if (flags & (TH_SYN|TH_FIN|TH_RST)) 1219 tcpstat.tcps_sndctrl++; 1220 else if (SEQ_GT(tp->snd_up, tp->snd_una)) 1221 tcpstat.tcps_sndurg++; 1222 else 1223 tcpstat.tcps_sndwinup++; 1224 1225 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1226 if (m != NULL && max_linkhdr + hdrlen > MHLEN) { 1227 MCLGET(m, M_DONTWAIT); 1228 if ((m->m_flags & M_EXT) == 0) { 1229 m_freem(m); 1230 m = NULL; 1231 } 1232 } 1233 if (m == NULL) { 1234 error = ENOBUFS; 1235 goto out; 1236 } 1237 MCLAIM(m, &tcp_tx_mowner); 1238 m->m_data += max_linkhdr; 1239 m->m_len = hdrlen; 1240 } 1241 m->m_pkthdr.rcvif = (struct ifnet *)0; 1242 switch (af) { 1243 #ifdef INET 1244 case AF_INET: 1245 ip = mtod(m, struct ip *); 1246 #ifdef INET6 1247 ip6 = NULL; 1248 #endif 1249 th = (struct tcphdr *)(ip + 1); 1250 break; 1251 #endif 1252 #ifdef INET6 1253 case AF_INET6: 1254 ip = NULL; 1255 ip6 = mtod(m, struct ip6_hdr *); 1256 th = (struct tcphdr *)(ip6 + 1); 1257 break; 1258 #endif 1259 default: /*pacify gcc*/ 1260 ip = NULL; 1261 #ifdef INET6 1262 ip6 = NULL; 1263 #endif 1264 th = NULL; 1265 break; 1266 } 1267 if (tp->t_template == 0) 1268 panic("tcp_output"); 1269 if (tp->t_template->m_len < iphdrlen) 1270 panic("tcp_output"); 1271 bcopy(mtod(tp->t_template, caddr_t), mtod(m, caddr_t), iphdrlen); 1272 1273 /* 1274 * If we are starting a connection, send ECN setup 1275 * SYN packet. If we are on a retransmit, we may 1276 * resend those bits a number of times as per 1277 * RFC 3168. 1278 */ 1279 if (tp->t_state == TCPS_SYN_SENT && tcp_do_ecn) { 1280 if (tp->t_flags & TF_SYN_REXMT) { 1281 if (tp->t_ecn_retries--) 1282 flags |= TH_ECE|TH_CWR; 1283 } else { 1284 flags |= TH_ECE|TH_CWR; 1285 tp->t_ecn_retries = tcp_ecn_maxretries; 1286 } 1287 } 1288 1289 if (TCP_ECN_ALLOWED(tp)) { 1290 /* 1291 * If the peer has ECN, mark data packets 1292 * ECN capable. Ignore pure ack packets, retransmissions 1293 * and window probes. 1294 */ 1295 if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && 1296 !(tp->t_force && len == 1)) { 1297 switch (af) { 1298 #ifdef INET 1299 case AF_INET: 1300 tp->t_inpcb->inp_ip.ip_tos |= IPTOS_ECN_ECT0; 1301 break; 1302 #endif 1303 #ifdef INET6 1304 case AF_INET6: 1305 ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); 1306 break; 1307 #endif 1308 } 1309 tcpstat.tcps_ecn_ect++; 1310 } 1311 1312 /* 1313 * Reply with proper ECN notifications. 1314 */ 1315 if (tp->t_flags & TF_ECN_SND_CWR) { 1316 flags |= TH_CWR; 1317 tp->t_flags &= ~TF_ECN_SND_CWR; 1318 } 1319 if (tp->t_flags & TF_ECN_SND_ECE) { 1320 flags |= TH_ECE; 1321 } 1322 } 1323 1324 1325 /* 1326 * If we are doing retransmissions, then snd_nxt will 1327 * not reflect the first unsent octet. For ACK only 1328 * packets, we do not want the sequence number of the 1329 * retransmitted packet, we want the sequence number 1330 * of the next unsent octet. So, if there is no data 1331 * (and no SYN or FIN), use snd_max instead of snd_nxt 1332 * when filling in ti_seq. But if we are in persist 1333 * state, snd_max might reflect one byte beyond the 1334 * right edge of the window, so use snd_nxt in that 1335 * case, since we know we aren't doing a retransmission. 1336 * (retransmit and persist are mutually exclusive...) 1337 */ 1338 if (TCP_SACK_ENABLED(tp) && sack_rxmit) { 1339 th->th_seq = htonl(p->rxmit); 1340 p->rxmit += len; 1341 } else { 1342 if (len || (flags & (TH_SYN|TH_FIN)) || 1343 TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) 1344 th->th_seq = htonl(tp->snd_nxt); 1345 else 1346 th->th_seq = htonl(tp->snd_max); 1347 } 1348 th->th_ack = htonl(tp->rcv_nxt); 1349 if (optlen) { 1350 bcopy((caddr_t)opt, (caddr_t)(th + 1), optlen); 1351 th->th_off = (sizeof (struct tcphdr) + optlen) >> 2; 1352 } 1353 th->th_flags = flags; 1354 /* 1355 * Calculate receive window. Don't shrink window, 1356 * but avoid silly window syndrome. 1357 */ 1358 if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)rxsegsize) 1359 win = 0; 1360 if (win > (long)TCP_MAXWIN << tp->rcv_scale) 1361 win = (long)TCP_MAXWIN << tp->rcv_scale; 1362 if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt)) 1363 win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt); 1364 th->th_win = htons((u_int16_t) (win>>tp->rcv_scale)); 1365 if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { 1366 u_int32_t urp = tp->snd_up - tp->snd_nxt; 1367 if (urp > IP_MAXPACKET) 1368 urp = IP_MAXPACKET; 1369 th->th_urp = htons((u_int16_t)urp); 1370 th->th_flags |= TH_URG; 1371 } else 1372 /* 1373 * If no urgent pointer to send, then we pull 1374 * the urgent pointer to the left edge of the send window 1375 * so that it doesn't drift into the send window on sequence 1376 * number wraparound. 1377 */ 1378 tp->snd_up = tp->snd_una; /* drag it along */ 1379 1380 #ifdef TCP_SIGNATURE 1381 #if defined(INET6) && defined(FAST_IPSEC) 1382 if (tp->t_family == AF_INET) /* XXX */ 1383 #endif 1384 if (sigoff && (tp->t_flags & TF_SIGNATURE)) { 1385 struct secasvar *sav; 1386 u_int8_t *sigp; 1387 1388 sav = tcp_signature_getsav(m, th); 1389 1390 if (sav == NULL) { 1391 if (m) 1392 m_freem(m); 1393 return (EPERM); 1394 } 1395 1396 m->m_pkthdr.len = hdrlen + len; 1397 sigp = (caddr_t)th + sizeof(*th) + sigoff; 1398 tcp_signature(m, th, (caddr_t)th - mtod(m, caddr_t), sav, sigp); 1399 1400 key_sa_recordxfer(sav, m); 1401 #ifdef FAST_IPSEC 1402 KEY_FREESAV(&sav); 1403 #else 1404 key_freesav(sav); 1405 #endif 1406 } 1407 #endif 1408 1409 /* 1410 * Set ourselves up to be checksummed just before the packet 1411 * hits the wire. 1412 */ 1413 switch (af) { 1414 #ifdef INET 1415 case AF_INET: 1416 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1417 if (use_tso) { 1418 m->m_pkthdr.segsz = txsegsize; 1419 m->m_pkthdr.csum_flags = M_CSUM_TSOv4; 1420 } else { 1421 m->m_pkthdr.csum_flags = M_CSUM_TCPv4; 1422 if (len + optlen) { 1423 /* Fixup the pseudo-header checksum. */ 1424 /* XXXJRT Not IP Jumbogram safe. */ 1425 th->th_sum = in_cksum_addword(th->th_sum, 1426 htons((u_int16_t) (len + optlen))); 1427 } 1428 } 1429 break; 1430 #endif 1431 #ifdef INET6 1432 case AF_INET6: 1433 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1434 if (use_tso) { 1435 m->m_pkthdr.segsz = txsegsize; 1436 m->m_pkthdr.csum_flags = M_CSUM_TSOv6; 1437 } else { 1438 m->m_pkthdr.csum_flags = M_CSUM_TCPv6; 1439 if (len + optlen) { 1440 /* Fixup the pseudo-header checksum. */ 1441 /* XXXJRT: Not IPv6 Jumbogram safe. */ 1442 th->th_sum = in_cksum_addword(th->th_sum, 1443 htons((u_int16_t) (len + optlen))); 1444 } 1445 } 1446 break; 1447 #endif 1448 } 1449 1450 /* 1451 * In transmit state, time the transmission and arrange for 1452 * the retransmit. In persist state, just set snd_max. 1453 */ 1454 if (tp->t_force == 0 || TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) { 1455 tcp_seq startseq = tp->snd_nxt; 1456 1457 /* 1458 * Advance snd_nxt over sequence space of this segment. 1459 * There are no states in which we send both a SYN and a FIN, 1460 * so we collapse the tests for these flags. 1461 */ 1462 if (flags & (TH_SYN|TH_FIN)) 1463 tp->snd_nxt++; 1464 if (sack_rxmit) 1465 goto timer; 1466 tp->snd_nxt += len; 1467 if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { 1468 tp->snd_max = tp->snd_nxt; 1469 /* 1470 * Time this transmission if not a retransmission and 1471 * not currently timing anything. 1472 */ 1473 if (tp->t_rtttime == 0) { 1474 tp->t_rtttime = tcp_now; 1475 tp->t_rtseq = startseq; 1476 tcpstat.tcps_segstimed++; 1477 } 1478 } 1479 1480 /* 1481 * Set retransmit timer if not currently set, 1482 * and not doing an ack or a keep-alive probe. 1483 * Initial value for retransmit timer is smoothed 1484 * round-trip time + 2 * round-trip time variance. 1485 * Initialize shift counter which is used for backoff 1486 * of retransmit time. 1487 */ 1488 timer: 1489 if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0 && 1490 ((sack_rxmit && tp->snd_nxt != tp->snd_max) || 1491 tp->snd_nxt != tp->snd_una)) { 1492 if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) { 1493 TCP_TIMER_DISARM(tp, TCPT_PERSIST); 1494 tp->t_rxtshift = 0; 1495 } 1496 TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 1497 } 1498 } else 1499 if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) 1500 tp->snd_max = tp->snd_nxt + len; 1501 1502 #ifdef TCP_DEBUG 1503 /* 1504 * Trace. 1505 */ 1506 if (so->so_options & SO_DEBUG) 1507 tcp_trace(TA_OUTPUT, tp->t_state, tp, m, 0); 1508 #endif 1509 1510 /* 1511 * Fill in IP length and desired time to live and 1512 * send to IP level. There should be a better way 1513 * to handle ttl and tos; we could keep them in 1514 * the template, but need a way to checksum without them. 1515 */ 1516 m->m_pkthdr.len = hdrlen + len; 1517 1518 switch (af) { 1519 #ifdef INET 1520 case AF_INET: 1521 ip->ip_len = htons(m->m_pkthdr.len); 1522 packetlen = m->m_pkthdr.len; 1523 if (tp->t_inpcb) { 1524 ip->ip_ttl = tp->t_inpcb->inp_ip.ip_ttl; 1525 ip->ip_tos = tp->t_inpcb->inp_ip.ip_tos; 1526 } 1527 #ifdef INET6 1528 else if (tp->t_in6pcb) { 1529 ip->ip_ttl = in6_selecthlim(tp->t_in6pcb, NULL); /*XXX*/ 1530 ip->ip_tos = 0; /*XXX*/ 1531 } 1532 #endif 1533 break; 1534 #endif 1535 #ifdef INET6 1536 case AF_INET6: 1537 packetlen = m->m_pkthdr.len; 1538 ip6->ip6_nxt = IPPROTO_TCP; 1539 if (tp->t_in6pcb) { 1540 /* 1541 * we separately set hoplimit for every segment, since 1542 * the user might want to change the value via 1543 * setsockopt. Also, desired default hop limit might 1544 * be changed via Neighbor Discovery. 1545 */ 1546 ip6->ip6_hlim = in6_selecthlim(tp->t_in6pcb, 1547 ro->ro_rt ? ro->ro_rt->rt_ifp : NULL); 1548 } 1549 /* ip6->ip6_flow = ??? */ 1550 /* ip6_plen will be filled in ip6_output(). */ 1551 break; 1552 #endif 1553 default: /*pacify gcc*/ 1554 packetlen = 0; 1555 break; 1556 } 1557 1558 switch (af) { 1559 #ifdef INET 1560 case AF_INET: 1561 { 1562 struct mbuf *opts; 1563 1564 if (tp->t_inpcb) 1565 opts = tp->t_inpcb->inp_options; 1566 else 1567 opts = NULL; 1568 error = ip_output(m, opts, ro, 1569 (tp->t_mtudisc ? IP_MTUDISC : 0) | 1570 (so->so_options & SO_DONTROUTE), 1571 (struct ip_moptions *)0, so); 1572 break; 1573 } 1574 #endif 1575 #ifdef INET6 1576 case AF_INET6: 1577 { 1578 struct ip6_pktopts *opts; 1579 1580 if (tp->t_in6pcb) 1581 opts = tp->t_in6pcb->in6p_outputopts; 1582 else 1583 opts = NULL; 1584 error = ip6_output(m, opts, (struct route_in6 *)ro, 1585 so->so_options & SO_DONTROUTE, 1586 (struct ip6_moptions *)0, so, NULL); 1587 break; 1588 } 1589 #endif 1590 default: 1591 error = EAFNOSUPPORT; 1592 break; 1593 } 1594 if (error) { 1595 out: 1596 if (error == ENOBUFS) { 1597 tcpstat.tcps_selfquench++; 1598 #ifdef INET 1599 if (tp->t_inpcb) 1600 tcp_quench(tp->t_inpcb, 0); 1601 #endif 1602 #ifdef INET6 1603 if (tp->t_in6pcb) 1604 tcp6_quench(tp->t_in6pcb, 0); 1605 #endif 1606 error = 0; 1607 } else if ((error == EHOSTUNREACH || error == ENETDOWN) && 1608 TCPS_HAVERCVDSYN(tp->t_state)) { 1609 tp->t_softerror = error; 1610 error = 0; 1611 } 1612 1613 /* Back out the seqence number advance. */ 1614 if (sack_rxmit) 1615 p->rxmit -= len; 1616 1617 /* Restart the delayed ACK timer, if necessary. */ 1618 if (tp->t_flags & TF_DELACK) 1619 TCP_RESTART_DELACK(tp); 1620 1621 return (error); 1622 } 1623 1624 if (packetlen > tp->t_pmtud_mtu_sent) 1625 tp->t_pmtud_mtu_sent = packetlen; 1626 1627 tcpstat.tcps_sndtotal++; 1628 if (tp->t_flags & TF_DELACK) 1629 tcpstat.tcps_delack++; 1630 1631 /* 1632 * Data sent (as far as we can tell). 1633 * If this advertises a larger window than any other segment, 1634 * then remember the size of the advertised window. 1635 * Any pending ACK has now been sent. 1636 */ 1637 if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 1638 tp->rcv_adv = tp->rcv_nxt + win; 1639 tp->last_ack_sent = tp->rcv_nxt; 1640 tp->t_flags &= ~TF_ACKNOW; 1641 TCP_CLEAR_DELACK(tp); 1642 #ifdef DIAGNOSTIC 1643 if (maxburst < 0) 1644 printf("tcp_output: maxburst exceeded by %d\n", -maxburst); 1645 #endif 1646 if (sendalot && (tp->t_congctl == &tcp_reno_ctl || --maxburst)) 1647 goto again; 1648 return (0); 1649 } 1650 1651 void 1652 tcp_setpersist(struct tcpcb *tp) 1653 { 1654 int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2); 1655 int nticks; 1656 1657 if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) 1658 panic("tcp_output REXMT"); 1659 /* 1660 * Start/restart persistance timer. 1661 */ 1662 if (t < tp->t_rttmin) 1663 t = tp->t_rttmin; 1664 TCPT_RANGESET(nticks, t * tcp_backoff[tp->t_rxtshift], 1665 TCPTV_PERSMIN, TCPTV_PERSMAX); 1666 TCP_TIMER_ARM(tp, TCPT_PERSIST, nticks); 1667 if (tp->t_rxtshift < TCP_MAXRXTSHIFT) 1668 tp->t_rxtshift++; 1669 } 1670 1671 #if defined(INET) 1672 /* 1673 * tcp4_segment: handle M_CSUM_TSOv4 by software. 1674 * 1675 * => always consume m. 1676 * => call output_func with output_arg for each segments. 1677 */ 1678 1679 int 1680 tcp4_segment(struct mbuf *m, int (*output_func)(void *, struct mbuf *), 1681 void *output_arg) 1682 { 1683 int mss; 1684 int iphlen; 1685 int thlen; 1686 int hlen; 1687 int len; 1688 struct ip *iph; 1689 struct tcphdr *th; 1690 uint16_t ipid; 1691 uint32_t tcpseq; 1692 struct mbuf *hdr = NULL; 1693 struct mbuf *t; 1694 int error = 0; 1695 1696 KASSERT((m->m_flags & M_PKTHDR) != 0); 1697 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0); 1698 1699 m->m_pkthdr.csum_flags = 0; 1700 1701 len = m->m_pkthdr.len; 1702 KASSERT(len >= sizeof(*iph) + sizeof(*th)); 1703 1704 if (m->m_len < sizeof(*iph)) { 1705 m = m_pullup(m, sizeof(*iph)); 1706 if (m == NULL) { 1707 error = ENOMEM; 1708 goto quit; 1709 } 1710 } 1711 iph = mtod(m, struct ip *); 1712 iphlen = iph->ip_hl * 4; 1713 KASSERT(iph->ip_v == IPVERSION); 1714 KASSERT(iphlen >= sizeof(*iph)); 1715 KASSERT(iph->ip_p == IPPROTO_TCP); 1716 ipid = ntohs(iph->ip_id); 1717 1718 hlen = iphlen + sizeof(*th); 1719 if (m->m_len < hlen) { 1720 m = m_pullup(m, hlen); 1721 if (m == NULL) { 1722 error = ENOMEM; 1723 goto quit; 1724 } 1725 } 1726 th = (void *)(mtod(m, char *) + iphlen); 1727 tcpseq = ntohl(th->th_seq); 1728 thlen = th->th_off * 4; 1729 hlen = iphlen + thlen; 1730 1731 mss = m->m_pkthdr.segsz; 1732 KASSERT(mss != 0); 1733 KASSERT(len > hlen); 1734 1735 t = m_split(m, hlen, M_NOWAIT); 1736 if (t == NULL) { 1737 error = ENOMEM; 1738 goto quit; 1739 } 1740 hdr = m; 1741 m = t; 1742 len -= hlen; 1743 KASSERT(len % mss == 0); 1744 while (len > 0) { 1745 struct mbuf *n; 1746 1747 n = m_dup(hdr, 0, hlen, M_NOWAIT); 1748 if (n == NULL) { 1749 error = ENOMEM; 1750 goto quit; 1751 } 1752 KASSERT(n->m_len == hlen); /* XXX */ 1753 1754 t = m_split(m, mss, M_NOWAIT); 1755 if (t == NULL) { 1756 m_freem(n); 1757 error = ENOMEM; 1758 goto quit; 1759 } 1760 m_cat(n, m); 1761 m = t; 1762 1763 KASSERT(n->m_len >= hlen); /* XXX */ 1764 1765 n->m_pkthdr.len = hlen + mss; 1766 iph = mtod(n, struct ip *); 1767 KASSERT(iph->ip_v == IPVERSION); 1768 iph->ip_len = htons(n->m_pkthdr.len); 1769 iph->ip_id = htons(ipid); 1770 th = (void *)(mtod(n, char *) + iphlen); 1771 th->th_seq = htonl(tcpseq); 1772 iph->ip_sum = 0; 1773 iph->ip_sum = in_cksum(n, iphlen); 1774 th->th_sum = 0; 1775 th->th_sum = in4_cksum(n, IPPROTO_TCP, iphlen, thlen + mss); 1776 1777 error = (*output_func)(output_arg, n); 1778 if (error) { 1779 goto quit; 1780 } 1781 1782 tcpseq += mss; 1783 ipid++; 1784 len -= mss; 1785 } 1786 1787 quit: 1788 if (hdr != NULL) { 1789 m_freem(hdr); 1790 } 1791 if (m != NULL) { 1792 m_freem(m); 1793 } 1794 1795 return error; 1796 } 1797 #endif /* defined(INET) */ 1798 1799 #if defined(INET6) 1800 /* 1801 * tcp6_segment: handle M_CSUM_TSOv6 by software. 1802 * 1803 * => always consume m. 1804 * => call output_func with output_arg for each segments. 1805 */ 1806 1807 int 1808 tcp6_segment(struct mbuf *m, int (*output_func)(void *, struct mbuf *), 1809 void *output_arg) 1810 { 1811 int mss; 1812 int iphlen; 1813 int thlen; 1814 int hlen; 1815 int len; 1816 struct ip6_hdr *iph; 1817 struct tcphdr *th; 1818 uint32_t tcpseq; 1819 struct mbuf *hdr = NULL; 1820 struct mbuf *t; 1821 int error = 0; 1822 1823 KASSERT((m->m_flags & M_PKTHDR) != 0); 1824 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0); 1825 1826 m->m_pkthdr.csum_flags = 0; 1827 1828 len = m->m_pkthdr.len; 1829 KASSERT(len >= sizeof(*iph) + sizeof(*th)); 1830 1831 if (m->m_len < sizeof(*iph)) { 1832 m = m_pullup(m, sizeof(*iph)); 1833 if (m == NULL) { 1834 error = ENOMEM; 1835 goto quit; 1836 } 1837 } 1838 iph = mtod(m, struct ip6_hdr *); 1839 iphlen = sizeof(*iph); 1840 KASSERT((iph->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION); 1841 KASSERT(iph->ip6_nxt == IPPROTO_TCP); 1842 1843 hlen = iphlen + sizeof(*th); 1844 if (m->m_len < hlen) { 1845 m = m_pullup(m, hlen); 1846 if (m == NULL) { 1847 error = ENOMEM; 1848 goto quit; 1849 } 1850 } 1851 th = (void *)(mtod(m, char *) + iphlen); 1852 tcpseq = ntohl(th->th_seq); 1853 thlen = th->th_off * 4; 1854 hlen = iphlen + thlen; 1855 1856 mss = m->m_pkthdr.segsz; 1857 KASSERT(mss != 0); 1858 KASSERT(len > hlen); 1859 1860 t = m_split(m, hlen, M_NOWAIT); 1861 if (t == NULL) { 1862 error = ENOMEM; 1863 goto quit; 1864 } 1865 hdr = m; 1866 m = t; 1867 len -= hlen; 1868 KASSERT(len % mss == 0); 1869 while (len > 0) { 1870 struct mbuf *n; 1871 1872 n = m_dup(hdr, 0, hlen, M_NOWAIT); 1873 if (n == NULL) { 1874 error = ENOMEM; 1875 goto quit; 1876 } 1877 KASSERT(n->m_len == hlen); /* XXX */ 1878 1879 t = m_split(m, mss, M_NOWAIT); 1880 if (t == NULL) { 1881 m_freem(n); 1882 error = ENOMEM; 1883 goto quit; 1884 } 1885 m_cat(n, m); 1886 m = t; 1887 1888 KASSERT(n->m_len >= hlen); /* XXX */ 1889 1890 n->m_pkthdr.len = hlen + mss; 1891 iph = mtod(n, struct ip6_hdr *); 1892 KASSERT((iph->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION); 1893 iph->ip6_plen = htons(thlen + mss); 1894 th = (void *)(mtod(n, char *) + iphlen); 1895 th->th_seq = htonl(tcpseq); 1896 th->th_sum = 0; 1897 th->th_sum = in6_cksum(n, IPPROTO_TCP, iphlen, thlen + mss); 1898 1899 error = (*output_func)(output_arg, n); 1900 if (error) { 1901 goto quit; 1902 } 1903 1904 tcpseq += mss; 1905 len -= mss; 1906 } 1907 1908 quit: 1909 if (hdr != NULL) { 1910 m_freem(hdr); 1911 } 1912 if (m != NULL) { 1913 m_freem(m); 1914 } 1915 1916 return error; 1917 } 1918 #endif /* defined(INET6) */ 1919