1 /* $NetBSD: tcp_subr.c,v 1.47 1998/04/13 21:18:19 kml Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 9 * Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. All advertising materials mentioning features or use of this software 53 * must display the following acknowledgement: 54 * This product includes software developed by the University of 55 * California, Berkeley and its contributors. 56 * 4. Neither the name of the University nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 * 72 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 73 */ 74 75 #include "opt_tcp_compat_42.h" 76 #include "rnd.h" 77 78 #include <sys/param.h> 79 #include <sys/proc.h> 80 #include <sys/systm.h> 81 #include <sys/malloc.h> 82 #include <sys/mbuf.h> 83 #include <sys/socket.h> 84 #include <sys/socketvar.h> 85 #include <sys/protosw.h> 86 #include <sys/errno.h> 87 #include <sys/kernel.h> 88 #if NRND > 0 89 #include <sys/rnd.h> 90 #endif 91 92 #include <net/route.h> 93 #include <net/if.h> 94 95 #include <netinet/in.h> 96 #include <netinet/in_systm.h> 97 #include <netinet/ip.h> 98 #include <netinet/in_pcb.h> 99 #include <netinet/ip_var.h> 100 #include <netinet/ip_icmp.h> 101 #include <netinet/tcp.h> 102 #include <netinet/tcp_fsm.h> 103 #include <netinet/tcp_seq.h> 104 #include <netinet/tcp_timer.h> 105 #include <netinet/tcp_var.h> 106 #include <netinet/tcpip.h> 107 108 /* patchable/settable parameters for tcp */ 109 int tcp_mssdflt = TCP_MSS; 110 int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; 111 int tcp_do_rfc1323 = 1; 112 int tcp_init_win = 1; 113 int tcp_mss_ifmtu = 0; 114 115 #ifndef TCBHASHSIZE 116 #define TCBHASHSIZE 128 117 #endif 118 int tcbhashsize = TCBHASHSIZE; 119 120 int tcp_freeq __P((struct tcpcb *)); 121 122 /* 123 * Tcp initialization 124 */ 125 void 126 tcp_init() 127 { 128 129 in_pcbinit(&tcbtable, tcbhashsize, tcbhashsize); 130 LIST_INIT(&tcp_delacks); 131 if (max_protohdr < sizeof(struct tcpiphdr)) 132 max_protohdr = sizeof(struct tcpiphdr); 133 if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) 134 panic("tcp_init"); 135 } 136 137 /* 138 * Create template to be used to send tcp packets on a connection. 139 * Call after host entry created, allocates an mbuf and fills 140 * in a skeletal tcp/ip header, minimizing the amount of work 141 * necessary when the connection is used. 142 */ 143 struct tcpiphdr * 144 tcp_template(tp) 145 struct tcpcb *tp; 146 { 147 register struct inpcb *inp = tp->t_inpcb; 148 register struct tcpiphdr *n; 149 150 if ((n = tp->t_template) == 0) { 151 MALLOC(n, struct tcpiphdr *, sizeof (struct tcpiphdr), 152 M_MBUF, M_NOWAIT); 153 if (n == NULL) 154 return (0); 155 } 156 bzero(n->ti_x1, sizeof n->ti_x1); 157 n->ti_pr = IPPROTO_TCP; 158 n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip)); 159 n->ti_src = inp->inp_laddr; 160 n->ti_dst = inp->inp_faddr; 161 n->ti_sport = inp->inp_lport; 162 n->ti_dport = inp->inp_fport; 163 n->ti_seq = 0; 164 n->ti_ack = 0; 165 n->ti_x2 = 0; 166 n->ti_off = 5; 167 n->ti_flags = 0; 168 n->ti_win = 0; 169 n->ti_sum = 0; 170 n->ti_urp = 0; 171 return (n); 172 } 173 174 /* 175 * Send a single message to the TCP at address specified by 176 * the given TCP/IP header. If m == 0, then we make a copy 177 * of the tcpiphdr at ti and send directly to the addressed host. 178 * This is used to force keep alive messages out using the TCP 179 * template for a connection tp->t_template. If flags are given 180 * then we send a message back to the TCP which originated the 181 * segment ti, and discard the mbuf containing it and any other 182 * attached mbufs. 183 * 184 * In any case the ack and sequence number of the transmitted 185 * segment are as specified by the parameters. 186 */ 187 int 188 tcp_respond(tp, ti, m, ack, seq, flags) 189 struct tcpcb *tp; 190 register struct tcpiphdr *ti; 191 register struct mbuf *m; 192 tcp_seq ack, seq; 193 int flags; 194 { 195 register int tlen; 196 int win = 0; 197 struct route *ro = 0; 198 199 if (tp) { 200 win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); 201 ro = &tp->t_inpcb->inp_route; 202 } 203 if (m == 0) { 204 m = m_gethdr(M_DONTWAIT, MT_HEADER); 205 if (m == NULL) 206 return (ENOBUFS); 207 #ifdef TCP_COMPAT_42 208 tlen = 1; 209 #else 210 tlen = 0; 211 #endif 212 m->m_data += max_linkhdr; 213 *mtod(m, struct tcpiphdr *) = *ti; 214 ti = mtod(m, struct tcpiphdr *); 215 flags = TH_ACK; 216 } else { 217 m_freem(m->m_next); 218 m->m_next = 0; 219 m->m_data = (caddr_t)ti; 220 m->m_len = sizeof (struct tcpiphdr); 221 tlen = 0; 222 #define xchg(a,b,type) { type t; t=a; a=b; b=t; } 223 xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_int32_t); 224 xchg(ti->ti_dport, ti->ti_sport, u_int16_t); 225 #undef xchg 226 } 227 bzero(ti->ti_x1, sizeof ti->ti_x1); 228 ti->ti_seq = htonl(seq); 229 ti->ti_ack = htonl(ack); 230 ti->ti_x2 = 0; 231 if ((flags & TH_SYN) == 0) { 232 if (tp) 233 ti->ti_win = htons((u_int16_t) (win >> tp->rcv_scale)); 234 else 235 ti->ti_win = htons((u_int16_t)win); 236 ti->ti_off = sizeof (struct tcphdr) >> 2; 237 tlen += sizeof (struct tcphdr); 238 } else 239 tlen += ti->ti_off << 2; 240 ti->ti_len = htons((u_int16_t)tlen); 241 tlen += sizeof (struct ip); 242 m->m_len = tlen; 243 m->m_pkthdr.len = tlen; 244 m->m_pkthdr.rcvif = (struct ifnet *) 0; 245 ti->ti_flags = flags; 246 ti->ti_urp = 0; 247 ti->ti_sum = 0; 248 ti->ti_sum = in_cksum(m, tlen); 249 ((struct ip *)ti)->ip_len = tlen; 250 ((struct ip *)ti)->ip_ttl = ip_defttl; 251 return ip_output(m, NULL, ro, 0, NULL); 252 } 253 254 /* 255 * Create a new TCP control block, making an 256 * empty reassembly queue and hooking it to the argument 257 * protocol control block. 258 */ 259 struct tcpcb * 260 tcp_newtcpcb(inp) 261 struct inpcb *inp; 262 { 263 register struct tcpcb *tp; 264 265 tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT); 266 if (tp == NULL) 267 return ((struct tcpcb *)0); 268 bzero((caddr_t)tp, sizeof(struct tcpcb)); 269 LIST_INIT(&tp->segq); 270 tp->t_peermss = tcp_mssdflt; 271 tp->t_ourmss = tcp_mssdflt; 272 tp->t_segsz = tcp_mssdflt; 273 274 tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; 275 tp->t_inpcb = inp; 276 /* 277 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no 278 * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives 279 * reasonable initial retransmit time. 280 */ 281 tp->t_srtt = TCPTV_SRTTBASE; 282 tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1); 283 tp->t_rttmin = TCPTV_MIN; 284 TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), 285 TCPTV_MIN, TCPTV_REXMTMAX); 286 tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; 287 tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; 288 inp->inp_ip.ip_ttl = ip_defttl; 289 inp->inp_ppcb = (caddr_t)tp; 290 return (tp); 291 } 292 293 /* 294 * Drop a TCP connection, reporting 295 * the specified error. If connection is synchronized, 296 * then send a RST to peer. 297 */ 298 struct tcpcb * 299 tcp_drop(tp, errno) 300 register struct tcpcb *tp; 301 int errno; 302 { 303 struct socket *so = tp->t_inpcb->inp_socket; 304 305 if (TCPS_HAVERCVDSYN(tp->t_state)) { 306 tp->t_state = TCPS_CLOSED; 307 (void) tcp_output(tp); 308 tcpstat.tcps_drops++; 309 } else 310 tcpstat.tcps_conndrops++; 311 if (errno == ETIMEDOUT && tp->t_softerror) 312 errno = tp->t_softerror; 313 so->so_error = errno; 314 return (tcp_close(tp)); 315 } 316 317 /* 318 * Close a TCP control block: 319 * discard all space held by the tcp 320 * discard internet protocol block 321 * wake up any sleepers 322 */ 323 struct tcpcb * 324 tcp_close(tp) 325 register struct tcpcb *tp; 326 { 327 struct inpcb *inp = tp->t_inpcb; 328 struct socket *so = inp->inp_socket; 329 #ifdef RTV_RTT 330 register struct rtentry *rt; 331 332 /* 333 * If we sent enough data to get some meaningful characteristics, 334 * save them in the routing entry. 'Enough' is arbitrarily 335 * defined as the sendpipesize (default 4K) * 16. This would 336 * give us 16 rtt samples assuming we only get one sample per 337 * window (the usual case on a long haul net). 16 samples is 338 * enough for the srtt filter to converge to within 5% of the correct 339 * value; fewer samples and we could save a very bogus rtt. 340 * 341 * Don't update the default route's characteristics and don't 342 * update anything that the user "locked". 343 */ 344 if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) && 345 (rt = inp->inp_route.ro_rt) && 346 !in_nullhost(satosin(rt_key(rt))->sin_addr)) { 347 register u_long i = 0; 348 349 if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { 350 i = tp->t_srtt * 351 ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2)); 352 if (rt->rt_rmx.rmx_rtt && i) 353 /* 354 * filter this update to half the old & half 355 * the new values, converting scale. 356 * See route.h and tcp_var.h for a 357 * description of the scaling constants. 358 */ 359 rt->rt_rmx.rmx_rtt = 360 (rt->rt_rmx.rmx_rtt + i) / 2; 361 else 362 rt->rt_rmx.rmx_rtt = i; 363 } 364 if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { 365 i = tp->t_rttvar * 366 ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTTVAR_SHIFT + 2)); 367 if (rt->rt_rmx.rmx_rttvar && i) 368 rt->rt_rmx.rmx_rttvar = 369 (rt->rt_rmx.rmx_rttvar + i) / 2; 370 else 371 rt->rt_rmx.rmx_rttvar = i; 372 } 373 /* 374 * update the pipelimit (ssthresh) if it has been updated 375 * already or if a pipesize was specified & the threshhold 376 * got below half the pipesize. I.e., wait for bad news 377 * before we start updating, then update on both good 378 * and bad news. 379 */ 380 if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && 381 (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) || 382 i < (rt->rt_rmx.rmx_sendpipe / 2)) { 383 /* 384 * convert the limit from user data bytes to 385 * packets then to packet data bytes. 386 */ 387 i = (i + tp->t_segsz / 2) / tp->t_segsz; 388 if (i < 2) 389 i = 2; 390 i *= (u_long)(tp->t_segsz + sizeof (struct tcpiphdr)); 391 if (rt->rt_rmx.rmx_ssthresh) 392 rt->rt_rmx.rmx_ssthresh = 393 (rt->rt_rmx.rmx_ssthresh + i) / 2; 394 else 395 rt->rt_rmx.rmx_ssthresh = i; 396 } 397 } 398 #endif /* RTV_RTT */ 399 /* free the reassembly queue, if any */ 400 (void) tcp_freeq(tp); 401 TCP_CLEAR_DELACK(tp); 402 403 if (tp->t_template) 404 FREE(tp->t_template, M_MBUF); 405 free(tp, M_PCB); 406 inp->inp_ppcb = 0; 407 soisdisconnected(so); 408 in_pcbdetach(inp); 409 tcpstat.tcps_closed++; 410 return ((struct tcpcb *)0); 411 } 412 413 int 414 tcp_freeq(tp) 415 struct tcpcb *tp; 416 { 417 register struct ipqent *qe; 418 int rv = 0; 419 420 while ((qe = tp->segq.lh_first) != NULL) { 421 LIST_REMOVE(qe, ipqe_q); 422 m_freem(qe->ipqe_m); 423 FREE(qe, M_IPQ); 424 rv = 1; 425 } 426 return (rv); 427 } 428 429 /* 430 * Protocol drain routine. Called when memory is in short supply. 431 */ 432 void 433 tcp_drain() 434 { 435 register struct inpcb *inp; 436 register struct tcpcb *tp; 437 438 /* 439 * Free the sequence queue of all TCP connections. 440 */ 441 inp = tcbtable.inpt_queue.cqh_first; 442 if (inp) /* XXX */ 443 for (; inp != (struct inpcb *)&tcbtable.inpt_queue; 444 inp = inp->inp_queue.cqe_next) { 445 if ((tp = intotcpcb(inp)) != NULL) { 446 if (tcp_freeq(tp)) 447 tcpstat.tcps_connsdrained++; 448 } 449 } 450 } 451 452 /* 453 * Notify a tcp user of an asynchronous error; 454 * store error as soft error, but wake up user 455 * (for now, won't do anything until can select for soft error). 456 */ 457 void 458 tcp_notify(inp, error) 459 struct inpcb *inp; 460 int error; 461 { 462 register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; 463 register struct socket *so = inp->inp_socket; 464 465 /* 466 * Ignore some errors if we are hooked up. 467 * If connection hasn't completed, has retransmitted several times, 468 * and receives a second error, give up now. This is better 469 * than waiting a long time to establish a connection that 470 * can never complete. 471 */ 472 if (tp->t_state == TCPS_ESTABLISHED && 473 (error == EHOSTUNREACH || error == ENETUNREACH || 474 error == EHOSTDOWN)) { 475 return; 476 } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 && 477 tp->t_rxtshift > 3 && tp->t_softerror) 478 so->so_error = error; 479 else 480 tp->t_softerror = error; 481 wakeup((caddr_t) &so->so_timeo); 482 sorwakeup(so); 483 sowwakeup(so); 484 } 485 486 void * 487 tcp_ctlinput(cmd, sa, v) 488 int cmd; 489 struct sockaddr *sa; 490 register void *v; 491 { 492 register struct ip *ip = v; 493 register struct tcphdr *th; 494 extern int inetctlerrmap[]; 495 void (*notify) __P((struct inpcb *, int)) = tcp_notify; 496 int errno; 497 int nmatch; 498 499 if ((unsigned)cmd >= PRC_NCMDS) 500 return NULL; 501 errno = inetctlerrmap[cmd]; 502 if (cmd == PRC_QUENCH) 503 notify = tcp_quench; 504 else if (PRC_IS_REDIRECT(cmd)) 505 notify = in_rtchange, ip = 0; 506 else if (cmd == PRC_MSGSIZE && ip_mtudisc) 507 notify = tcp_mtudisc, ip = 0; 508 else if (cmd == PRC_HOSTDEAD) 509 ip = 0; 510 else if (errno == 0) 511 return NULL; 512 if (ip) { 513 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 514 nmatch = in_pcbnotify(&tcbtable, satosin(sa)->sin_addr, 515 th->th_dport, ip->ip_src, th->th_sport, errno, notify); 516 if (nmatch == 0 && syn_cache_count && 517 (inetctlerrmap[cmd] == EHOSTUNREACH || 518 inetctlerrmap[cmd] == ENETUNREACH || 519 inetctlerrmap[cmd] == EHOSTDOWN)) 520 syn_cache_unreach(ip, th); 521 } else 522 (void)in_pcbnotifyall(&tcbtable, satosin(sa)->sin_addr, errno, 523 notify); 524 return NULL; 525 } 526 527 /* 528 * When a source quench is received, close congestion window 529 * to one segment. We will gradually open it again as we proceed. 530 */ 531 void 532 tcp_quench(inp, errno) 533 struct inpcb *inp; 534 int errno; 535 { 536 struct tcpcb *tp = intotcpcb(inp); 537 538 if (tp) 539 tp->snd_cwnd = TCP_INITIAL_WINDOW(1, tp->t_segsz); 540 } 541 542 /* 543 * On receipt of path MTU corrections, flush old route and replace it 544 * with the new one. Retransmit all unacknowledged packets, to ensure 545 * that all packets will be received. 546 */ 547 void 548 tcp_mtudisc(inp, errno) 549 struct inpcb *inp; 550 int errno; 551 { 552 struct tcpcb *tp = intotcpcb(inp); 553 struct rtentry *rt = in_pcbrtentry(inp); 554 555 if (tp != 0) { 556 if (rt != 0) { 557 /* 558 * If this was not a host route, remove and realloc. 559 */ 560 if ((rt->rt_flags & RTF_HOST) == 0) { 561 in_rtchange(inp, errno); 562 if ((rt = in_pcbrtentry(inp)) == 0) 563 return; 564 } 565 566 /* 567 * Slow start out of the error condition. We 568 * use the MTU because we know it's smaller 569 * than the previously transmitted segment. 570 */ 571 if (rt->rt_rmx.rmx_mtu != 0) 572 tp->snd_cwnd = 573 TCP_INITIAL_WINDOW(tcp_init_win, 574 rt->rt_rmx.rmx_mtu); 575 } 576 577 /* 578 * Resend unacknowledged packets. 579 */ 580 tp->snd_nxt = tp->snd_una; 581 tcp_output(tp); 582 } 583 } 584 585 586 /* 587 * Compute the MSS to advertise to the peer. Called only during 588 * the 3-way handshake. If we are the server (peer initiated 589 * connection), we are called with the TCPCB for the listen 590 * socket. If we are the client (we initiated connection), we 591 * are called witht he TCPCB for the actual connection. 592 */ 593 u_long 594 tcp_mss_to_advertise(ifp) 595 const struct ifnet *ifp; 596 { 597 extern u_long in_maxmtu; 598 u_long mss = 0; 599 600 /* 601 * In order to avoid defeating path MTU discovery on the peer, 602 * we advertise the max MTU of all attached networks as our MSS, 603 * per RFC 1191, section 3.1. 604 * 605 * We provide the option to advertise just the MTU of 606 * the interface on which we hope this connection will 607 * be receiving. If we are responding to a SYN, we 608 * will have a pretty good idea about this, but when 609 * initiating a connection there is a bit more doubt. 610 * 611 * We also need to ensure that loopback has a large enough 612 * MSS, as the loopback MTU is never included in in_maxmtu. 613 */ 614 615 if (ifp != NULL) 616 mss = ifp->if_mtu; 617 618 if (tcp_mss_ifmtu == 0) 619 mss = max(in_maxmtu, mss); 620 621 if (mss > sizeof(struct tcpiphdr)) 622 mss -= sizeof(struct tcpiphdr); 623 624 mss = max(tcp_mssdflt, mss); 625 return (mss); 626 } 627 628 /* 629 * Set connection variables based on the peer's advertised MSS. 630 * We are passed the TCPCB for the actual connection. If we 631 * are the server, we are called by the compressed state engine 632 * when the 3-way handshake is complete. If we are the client, 633 * we are called when we recieve the SYN,ACK from the server. 634 * 635 * NOTE: Our advertised MSS value must be initialized in the TCPCB 636 * before this routine is called! 637 */ 638 void 639 tcp_mss_from_peer(tp, offer) 640 struct tcpcb *tp; 641 int offer; 642 { 643 struct inpcb *inp = tp->t_inpcb; 644 struct socket *so = inp->inp_socket; 645 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH) 646 struct rtentry *rt = in_pcbrtentry(inp); 647 #endif 648 u_long bufsize; 649 int mss; 650 651 /* 652 * As per RFC1122, use the default MSS value, unless they 653 * sent us an offer. Do not accept offers less than 32 bytes. 654 */ 655 mss = tcp_mssdflt; 656 if (offer) 657 mss = offer; 658 mss = max(mss, 32); /* sanity */ 659 mss -= (tcp_optlen(tp) + ip_optlen(tp->t_inpcb)); 660 661 /* 662 * If there's a pipesize, change the socket buffer to that size. 663 * Make the socket buffer an integral number of MSS units. If 664 * the MSS is larger than the socket buffer, artificially decrease 665 * the MSS. 666 */ 667 #ifdef RTV_SPIPE 668 if (rt != NULL && rt->rt_rmx.rmx_sendpipe != 0) 669 bufsize = rt->rt_rmx.rmx_sendpipe; 670 else 671 #endif 672 bufsize = so->so_snd.sb_hiwat; 673 if (bufsize < mss) 674 mss = bufsize; 675 else { 676 bufsize = roundup(bufsize, mss); 677 if (bufsize > sb_max) 678 bufsize = sb_max; 679 (void) sbreserve(&so->so_snd, bufsize); 680 } 681 tp->t_peermss = mss; 682 tp->t_segsz = mss; 683 684 #ifdef RTV_SSTHRESH 685 if (rt != NULL && rt->rt_rmx.rmx_ssthresh) { 686 /* 687 * There's some sort of gateway or interface buffer 688 * limit on the path. Use this to set the slow 689 * start threshold, but set the threshold to no less 690 * than 2 * MSS. 691 */ 692 tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); 693 } 694 #endif 695 } 696 697 /* 698 * Processing necessary when a TCP connection is established. 699 */ 700 void 701 tcp_established(tp) 702 struct tcpcb *tp; 703 { 704 struct inpcb *inp = tp->t_inpcb; 705 struct socket *so = inp->inp_socket; 706 #ifdef RTV_RPIPE 707 struct rtentry *rt = in_pcbrtentry(inp); 708 #endif 709 u_long bufsize; 710 711 tp->t_state = TCPS_ESTABLISHED; 712 tp->t_timer[TCPT_KEEP] = tcp_keepidle; 713 714 #ifdef RTV_RPIPE 715 if (rt != NULL && rt->rt_rmx.rmx_recvpipe != 0) 716 bufsize = rt->rt_rmx.rmx_recvpipe; 717 else 718 #endif 719 bufsize = so->so_rcv.sb_hiwat; 720 if (bufsize > tp->t_ourmss) { 721 bufsize = roundup(bufsize, tp->t_ourmss); 722 if (bufsize > sb_max) 723 bufsize = sb_max; 724 (void) sbreserve(&so->so_rcv, bufsize); 725 } 726 } 727 728 /* 729 * Check if there's an initial rtt or rttvar. Convert from the 730 * route-table units to scaled multiples of the slow timeout timer. 731 * Called only during the 3-way handshake. 732 */ 733 void 734 tcp_rmx_rtt(tp) 735 struct tcpcb *tp; 736 { 737 #ifdef RTV_RTT 738 struct rtentry *rt; 739 int rtt; 740 741 if ((rt = in_pcbrtentry(tp->t_inpcb)) == NULL) 742 return; 743 744 if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { 745 /* 746 * XXX The lock bit for MTU indicates that the value 747 * is also a minimum value; this is subject to time. 748 */ 749 if (rt->rt_rmx.rmx_locks & RTV_RTT) 750 TCPT_RANGESET(tp->t_rttmin, 751 rtt / (RTM_RTTUNIT / PR_SLOWHZ), 752 TCPTV_MIN, TCPTV_REXMTMAX); 753 tp->t_srtt = rtt / 754 ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2)); 755 if (rt->rt_rmx.rmx_rttvar) { 756 tp->t_rttvar = rt->rt_rmx.rmx_rttvar / 757 ((RTM_RTTUNIT / PR_SLOWHZ) >> 758 (TCP_RTTVAR_SHIFT + 2)); 759 } else { 760 /* Default variation is +- 1 rtt */ 761 tp->t_rttvar = 762 tp->t_srtt >> (TCP_RTT_SHIFT - TCP_RTTVAR_SHIFT); 763 } 764 TCPT_RANGESET(tp->t_rxtcur, 765 ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2), 766 tp->t_rttmin, TCPTV_REXMTMAX); 767 } 768 #endif 769 } 770 771 tcp_seq tcp_iss_seq = 0; /* tcp initial seq # */ 772 773 /* 774 * Get a new sequence value given a tcp control block 775 */ 776 tcp_seq 777 tcp_new_iss(tp, len, addin) 778 void *tp; 779 u_long len; 780 tcp_seq addin; 781 { 782 tcp_seq tcp_iss; 783 784 /* 785 * add randomness about this connection, but do not estimate 786 * entropy from the timing, since the physical device driver would 787 * have done that for us. 788 */ 789 #if NRND > 0 790 if (tp != NULL) 791 rnd_add_data(NULL, tp, len, 0); 792 #endif 793 794 /* 795 * randomize. 796 */ 797 #if NRND > 0 798 rnd_extract_data(&tcp_iss, sizeof(tcp_iss), RND_EXTRACT_ANY); 799 #else 800 tcp_iss = random(); 801 #endif 802 803 /* 804 * If we were asked to add some amount to a known value, 805 * we will take a random value obtained above, mask off the upper 806 * bits, and add in the known value. We also add in a constant to 807 * ensure that we are at least a certain distance from the original 808 * value. 809 * 810 * This is used when an old connection is in timed wait 811 * and we have a new one coming in, for instance. 812 */ 813 if (addin != 0) { 814 #ifdef TCPISS_DEBUG 815 printf("Random %08x, ", tcp_iss); 816 #endif 817 tcp_iss &= TCP_ISS_RANDOM_MASK; 818 tcp_iss = tcp_iss + addin + TCP_ISSINCR; 819 tcp_iss_seq += TCP_ISSINCR; 820 tcp_iss += tcp_iss_seq; 821 #ifdef TCPISS_DEBUG 822 printf("Old ISS %08x, ISS %08x\n", addin, tcp_iss); 823 #endif 824 } else { 825 tcp_iss &= TCP_ISS_RANDOM_MASK; 826 tcp_iss_seq += TCP_ISSINCR; 827 tcp_iss += tcp_iss_seq; 828 #ifdef TCPISS_DEBUG 829 printf("ISS %08x\n", tcp_iss); 830 #endif 831 } 832 833 #ifdef TCP_COMPAT_42 834 /* 835 * limit it to the positive range for really old TCP implementations 836 */ 837 if ((int)tcp_iss < 0) 838 tcp_iss &= 0x7fffffff; /* XXX */ 839 #endif 840 841 return tcp_iss; 842 } 843 844 845 /* 846 * Determine the length of the TCP options for this connection. 847 * 848 * XXX: What do we do for SACK, when we add that? Just reserve 849 * all of the space? Otherwise we can't exactly be incrementing 850 * cwnd by an amount that varies depending on the amount we last 851 * had to SACK! 852 */ 853 854 u_int 855 tcp_optlen(tp) 856 struct tcpcb *tp; 857 { 858 if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == 859 (TF_REQ_TSTMP | TF_RCVD_TSTMP)) 860 return TCPOLEN_TSTAMP_APPA; 861 else 862 return 0; 863 } 864 865 866