1 /* tcp_input.c 1.30 81/11/24 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/mbuf.h" 6 #include "../h/socket.h" 7 #include "../h/socketvar.h" 8 #include "../net/inet.h" 9 #include "../net/inet_pcb.h" 10 #include "../net/inet_systm.h" 11 #include "../net/imp.h" 12 #include "../net/ip.h" 13 #include "../net/ip_var.h" 14 #include "../net/tcp.h" 15 #include "../net/tcp_fsm.h" 16 #include "../net/tcp_var.h" 17 #include "/usr/include/errno.h" 18 19 int tcpcksum = 1; 20 21 struct sockaddr_in tcp_sockaddr = { AF_INET }; 22 23 /* 24 * TCP input routine, follows pages 65-76 of the 25 * protocol specification dated September, 1981 very closely. 26 */ 27 tcp_input(m0) 28 struct mbuf *m0; 29 { 30 register struct tcpiphdr *ti; 31 struct inpcb *inp; 32 register struct mbuf *m; 33 int len, tlen, off; 34 register struct tcpcb *tp; 35 register int tiflags; 36 struct socket *so; 37 seq_t segend; 38 int acceptable; 39 40 COUNT(TCP_INPUT); 41 /* 42 * Get ip and tcp header together in first mbuf. 43 */ 44 m = m0; 45 if (m->m_len < sizeof (struct tcpiphdr) && 46 m_pullup(m, sizeof (struct tcpiphdr)) == 0) { 47 tcpstat.tcps_hdrops++; 48 goto bad; 49 } 50 ti = mtod(m, struct tcpiphdr *); 51 if (ti->ti_len > sizeof (struct ip)) 52 ip_stripoptions((struct ip *)ti, (char *)0); 53 54 /* 55 * Checksum extended tcp header and data. 56 */ 57 tlen = ((struct ip *)ti)->ip_len; 58 len = sizeof (struct ip) + tlen; 59 if (tcpcksum) { 60 ti->ti_next = ti->ti_prev = 0; 61 ti->ti_x1 = 0; 62 ti->ti_len = htons((u_short)tlen); 63 if ((ti->ti_sum = inet_cksum(m, len)) != 0xffff) { 64 tcpstat.tcps_badsum++; 65 printf("tcp cksum %x\n", ti->ti_sum); 66 goto bad; 67 } 68 } 69 70 /* 71 * Check that tcp offset makes sense, 72 * process tcp options and adjust length. 73 */ 74 off = ti->ti_off << 2; 75 if (off < sizeof (struct tcphdr) || off > ti->ti_len) { 76 tcpstat.tcps_badoff++; 77 goto bad; 78 } 79 ti->ti_len = tlen - off; 80 /* PROCESS OPTIONS */ 81 tiflags = ti->ti_flags; 82 83 /* 84 * Locate pcb for segment. 85 */ 86 inp = in_pcblookup 87 (&tcb, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport); 88 89 /* 90 * If the state is CLOSED (i.e., TCB does not exist) then 91 * all data in the incoming segment is discarded. (p. 65). 92 */ 93 if (inp == 0) 94 goto sendreset; 95 tp = intotcpcb(inp); 96 if (tp == 0) 97 goto sendreset; 98 99 /* 100 * Convert tcp protocol specific fields to host format. 101 */ 102 ti->ti_seq = ntohl(ti->ti_seq); 103 ti->ti_ackno = ntohl((n_long)ti->ti_ackno); 104 ti->ti_win = ntohs(ti->ti_win); 105 ti->ti_urp = ntohs(ti->ti_urp); 106 107 /* 108 * Discard ip header, and do tcp input processing. 109 */ 110 off += sizeof (struct ip); 111 m->m_off += off; 112 m->m_len -= off; 113 114 switch (tp->t_state) { 115 116 /* 117 * If the state is LISTEN then ignore segment if it contains an RST. 118 * If the segment contains an ACK then it is bad and send a RST. 119 * If it does not contain a SYN then it is not interesting; drop it. 120 * Otherwise initialize tp->rcv_next, and tp->irs, select an initial 121 * tp->iss, and send a segment: 122 * <SEQ=ISS><ACK=RCV>NXT><CTL=SYN,ACK> 123 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. 124 * Fill in remote peer address fields if not previously specified. 125 * Enter SYN_RECEIVED state, and process any other fields of this 126 * segment in this state. (p. 65) 127 */ 128 case TCPS_LISTEN: 129 if (tiflags & TH_RST) 130 goto drop; 131 if (tiflags & TH_ACK) 132 goto sendrst; 133 if ((tiflags & TH_SYN) == 0) 134 goto drop; 135 tp->rcv_nxt = ti->ti_seq + 1; 136 tp->irs = ti->ti_seq; 137 tp->iss = tcp_selectiss(); 138 tcp_reflect(ti, tp->iss, tp->rcv_next, TH_SYN|TH_ACK); 139 tp->t_state = TCPS_SYN_RECEIVED; 140 tiflags &= ~TH_SYN; tiflags |= TH_RST; 141 if (inp->inp_faddr.s_addr == 0) { 142 inp->inp_faddr = ti->ti_src; 143 inp->inp_fport = ti->ti_sport; 144 } 145 break; 146 147 /* 148 * If the state is SYN_SENT: 149 * if seg contains an ACK, but not for our SYN, drop the input. 150 * if seg contains a RST, then drop the connection. 151 * if seg does not contain SYN, then drop it. 152 * Otherwise this is an acceptable SYN segment 153 * initialize tp->rcv_nxt and tp->irs 154 * if seg contains ack then advance tp->snd_una 155 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state 156 * arrange for segment to be acked (eventually) 157 * continue processing rest of data/controls, beginning with URG 158 */ 159 case TCPS_SYN_SENT: 160 if ((tiflags & TH_ACK) && 161 (SEQ_LEQ(ti->ti_ack, tp->iss) || 162 SEQ_GT(ti->ti_ack, tp->snd.nxt))) { 163 tcp_reflect(ti, ti->ti_ack, 0, TH_RST); 164 goto drop; 165 } 166 if (tiflags & TH_RST) { 167 if (tiflags & TH_ACK) 168 tcp_drop(tp, ENETRESET); 169 goto drop; 170 } 171 if ((tiflags & TH_SYN) == 0) 172 goto drop; 173 tp->rcv_nxt = ti->ti_seq + 1; 174 tp->irs = ti->ti_seq; 175 tp->snd_una = ti->ti_seq; 176 if (SEQ_GT(tp->snd_una, tp->iss)) { 177 tp->t_state = TCPS_ESTABLISHED; 178 tp->t_flags |= TF_OWEACK; 179 goto step6; 180 } 181 tp->t_state = TCPS_SYN_RECEIVED; 182 tcp_reflect(ti, tp->iss, tp->rcv_nxt, TH_SYN|TH_ACK); 183 break; 184 } 185 186 /* 187 * States other than LISTEN or SYN_SENT. 188 * First check that at least some bytes of segment are within 189 * receive window. 190 */ 191 if (tp->rcv_wnd == 0) { 192 /* 193 * If window is closed can only take segments at 194 * window edge, and have to drop data and EOL from 195 * incoming segments. 196 */ 197 if (tp->rcv_nxt != ti->ti_seq) 198 goto dropafterack; 199 if (tp->ti_len > 0) { 200 tp->ti_len = 0; 201 tp->ti_flags &= ~(TH_EOL|TH_FIN); 202 } 203 } else { 204 /* 205 * If segment begins before rcv_next, drop leading 206 * data (and SYN); if nothing left, just ack. 207 */ 208 if (SEQ_GT(tp->rcv_nxt, ti->ti_seq)) { 209 tcpseq_t todrop = tp->rcv_nxt - ti->ti_seq; 210 if (todrop > ti->ti_len) 211 goto dropafterack; 212 m_adj(m, todrop); 213 ti->ti_seq += todrop; 214 ti->ti_len -= todrop; 215 ti->ti_flags &= ~TH_SYN; 216 } 217 /* 218 * If segment ends after window, drop trailing data 219 * (and EOL and FIN); if there would be nothing left, just ack. 220 */ 221 if (SEQ_GT(ti->ti_seq+ti->ti_len, tp->rcv_nxt+tp->rcv_wnd)) { 222 tcpseq_t todrop = 223 ti->ti_seq+ti->ti_len - (tp->rcv_nxt+tp->rcv_wnd); 224 if (todrop > ti->ti_len) 225 goto dropafterack; 226 m_adj(m, -todrop); 227 ti->ti_len -= todrop; 228 ti->ti_flags &= ~(TH_EOL|TH_FIN); 229 } 230 } 231 232 /* 233 * If the RST bit is set examine the state: 234 * SYN_RECEIVED STATE: 235 * If passive open, return to LISTEN state. 236 * If active open, inform user that connection was refused. 237 * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: 238 * Inform user that connection was reset, and close tcb. 239 * CLOSING, LAST_ACK, TIME_WAIT STATES 240 * Close the tcb. 241 */ 242 if (tiflags&TH_RST) switch (tp->t_state) { 243 244 case TCPS_SYN_RECEIVED: 245 if (inp->inp_socket->so_options & SO_ACCEPTCONN) { 246 tp->t_state = LISTEN; 247 inp->inp_fhost->s_addr = 0; 248 goto drop; 249 } 250 tcp_drop(tp, EREFUSED); 251 goto drop; 252 253 case TCPS_ESTABLISHED: 254 case TCPS_FIN_WAIT_1: 255 case TCPS_FIN_WAIT_2: 256 case TCPS_CLOSE_WAIT: 257 tcp_drop(tp, ECONNRESET); 258 goto drop; 259 260 case TCPS_CLOSING: 261 case TCPS_LAST_ACK: 262 case TCPS_TIME_WAIT: 263 tcp_close(tp); 264 goto drop; 265 } 266 267 /* 268 * If a SYN is in the window, then this is an 269 * error and we send an RST and drop the connection. 270 */ 271 if (tiflags & TH_SYN) { 272 tcp_drop(tp, ECONNRESET); 273 goto sendreset; 274 } 275 276 /* 277 * If the ACK bit is off we drop the segment and return. 278 */ 279 if ((tiflags & TI_ACK) == 0) 280 goto drop; 281 282 /* 283 * Ack processing. 284 */ 285 switch (tp->t_state) { 286 287 /* 288 * In SYN_RECEIVED state if the ack ACKs our SYN then enter 289 * ESTABLISHED state and continue processing, othewise 290 * send an RST. 291 */ 292 case TCPS_SYN_RECEIVED: 293 if (SEQ_LEQ(tp->snd_una, ti->ti_ack) && 294 SEQ_LEQ(ti->ti_ack, tp->snd_nxt)) 295 tp->t_state = TCPS_ESTABLISHED; 296 else 297 goto sendreset; 298 /* fall into next case, below... */ 299 300 /* 301 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range 302 * ACKs. If the ack is in the range 303 * tp->snd_una < ti->ti_ack <= tp->snd_nxt 304 * then advance tp->snd_una to ti->ti_ack and drop 305 * data from the retransmission queue. If this ACK reflects 306 * more up to date window information we update our window information. 307 */ 308 case TCPS_ESTABLISHED: 309 case TCPS_FIN_WAIT_1: 310 case TCPS_FIN_WAIT_2: 311 case TCPS_CLOSE_WAIT: 312 case TCPS_CLOSING: 313 if (SEQ_LT(ti->ti_ack, tp->snd_una)) 314 break; 315 if (SEQ_GT(ti->ti_ack, tp->snd_nxt)) 316 goto dropafterack; 317 sbdrop(&so->so_snd, ti->ti_ack - tp->snd_una); 318 tp->snd_una = ti->ti_ack; 319 if (SEQ_LT(tp->snd_wl1, ti->ti_seq) || 320 tp->snd_wl1==ti-ti_seq && SEQ_LEQ(tp->snd_wl2,ti->ti_seq)) { 321 tp->snd_wnd = ti->ti_win; 322 tp->snd_wl1 = ti->ti_seq; 323 tp->snd_wl2 = ti->ti_ack; 324 } 325 326 switch (tp->t_state) { 327 328 /* 329 * In FIN_WAIT_1 STATE in addition to the processing 330 * for the ESTABLISHED state if our FIN is now acknowledged 331 * then enter FIN_WAIT_2 and continue processing in that state. 332 */ 333 case TCPS_FIN_WAIT_1: 334 if (tcp_finisacked(tp) == 0) 335 break; 336 tp->t_state = TCPS_FIN_WAIT_2; 337 /* fall into ... */ 338 339 /* 340 * In FIN_WAIT_2 STATE in addition to the processing for 341 * the ESTABLISHED state allow the user to close when 342 * the data has drained. 343 */ 344 case TCPS_FIN_WAIT_2: 345 tcp_usrcanclose(tp); 346 break; 347 348 /* 349 * In CLOSING STATE in addition to the processing for 350 * the ESTABLISHED state if the ACK acknowledges our FIN 351 * then enter the TIME-WAIT state, otherwise ignore 352 * the segment. 353 */ 354 case TCPS_CLOSING: 355 if (tcp_finisacked(tp)) 356 tp->t_state = TCPS_TIME_WAIT; 357 break; 358 359 /* 360 * In LAST_ACK state if our FIN is now acknowledged 361 * then enter the TIME_WAIT state, otherwise ignore the 362 * segment. 363 */ 364 case TCPS_LAST_ACK: 365 if (tcp_finisacked(tp)) 366 tcp_close(tp); 367 goto drop; 368 369 /* 370 * In TIME_WAIT state the only thing that should arrive 371 * is a retransmission of the remote FIN. Acknowledge 372 * it and restart the finack timer. 373 */ 374 case TCPS_TIME_WAIT: 375 tp->t_finack = 2 * TCP_MSL; 376 goto dropafterack; 377 } 378 379 step6: 380 /* 381 * If an URG bit is set in the segment and is greater than the 382 * current known urgent pointer, then signal the user that the 383 * remote side has urgent data. This should not happen 384 * in CLOSE_WAIT, CLOSING, LAST-ACK or TIME_WAIT STATES since 385 * a FIN has been received from the remote side. In these states 386 * we ignore the URG. 387 */ 388 if ((tiflags & TH_URG) == 0 && (TCPS_RCVDFIN(tp->t_state) == 0) { 389 if (SEQ_GT(ti->ti_urp, tp->rcv_up) { 390 tp->rcv_up = ti->ti_urp; 391 soisurgendata(so); /* XXX */ 392 } 393 } 394 395 /* 396 * Process the segment text, merging it into the TCP sequencing queue, 397 * and arranging for acknowledgment of receipt if necessary. 398 * This process logically involves adjusting tp->rcv_wnd as data 399 * is presented to the user (this happens in tcp_usrreq.c, 400 * case PRU_RCVD). If a FIN has already been received on this 401 * connection then we just ignore the text. 402 */ 403 if (ti->ti_len) { 404 if (TCPS_RCVDFIN(tp->t_state)) 405 goto drop; 406 tiflags = tcp_reass(tp, ti); 407 else 408 m_freem(m); 409 410 /* 411 * If FIN is received then if we haven't received SYN and 412 * therefore can't validate drop the segment. Otherwise ACK 413 * the FIN and let the user know that the connection is closing. 414 */ 415 if ((tiflags & TH_FIN) && TCPS_HAVERCVDSYN(tp->t_state)) { 416 tcp_usrclosing(tp); 417 tp->t_flags |= TF_ACKNOW; 418 tp->rcv_nxt++; 419 switch (tp->t_state) { 420 421 /* 422 * In SYN_RECEIVED and ESTABLISHED STATES 423 * enter the CLOSE_WAIT state. 424 */ 425 case TCPS_SYN_RECEIVED: 426 case TCPS_ESTABLISHED: 427 tp->t_state = TCPS_CLOSE_WAIT; 428 break; 429 430 /* 431 * In FIN_WAIT_1 STATE if our FIN has been acked then 432 * enter TIME_WAIT state, starting the associated timer 433 * and turning off all other standard timers. 434 * If FIN has not been acked enter the CLOSING state. 435 */ 436 case TCPS_FIN_WAIT_1: 437 if (tcp_finisacked(tp)) { 438 tp->t_state = TCPS_TIME_WAIT; 439 tcp_canceltimers(tp, 0); 440 tp->t_timer[TCPT_FINACK] = TCPSC_2MSL; 441 } else 442 tp->t_state = TCPS_CLOSING; 443 break; 444 } 445 446 /* 447 * In FIN_WAIT_2 state enter the TIME_WAIT state, 448 * starting the time-wait timer, turning off the other 449 * standard timers. 450 */ 451 case TCPS_FIN_WAIT_2: 452 tp->t_state = TCPS_FIN_WAIT_2; 453 tcp_canceltimers(tp, 0); 454 tp->t_timer[TCPT_FINACK] = TCPSC_2MSL; 455 break; 456 457 /* 458 * In TIME_WAIT state restart the 2 MSL time_wait timer. 459 */ 460 case TCPS_TIME_WAIT: 461 tp->t_timer[TCPT_FINACK] = TCPSC_2MSL; 462 break; 463 } 464 return; 465 dropafterack: 466 if ((tiflags & TH_RST) == 0) 467 tcp_reflect(ti, tp->rcv_nxt, tp->snd_nxt, TH_ACK); 468 drop: 469 m_freem(m); 470 return; 471 } 472 473 /* 474 * Insert segment ti into reassembly queue of tcp with 475 * control block tp. Return TH_FIN if reassembly now includes 476 * a segment with FIN. 477 */ 478 tcp_reass(tp, ti, endp) 479 register struct tcpcb *tp; 480 register struct tcpiphdr *ti; 481 int *endp; 482 { 483 register struct tcpiphdr *q; 484 int flags = 0; /* no FIN */ 485 int overage; 486 487 /* 488 * If no data in this segment may want 489 * to move data up to socket structure (if 490 * connection is now established). 491 */ 492 if (ti->ti_len == 0) { 493 m_freem(dtom(ti)); 494 goto present; 495 } 496 497 /* 498 * Find a segment which begins after this one does. 499 */ 500 for (q = tp->seg_next; q != (struct tcpiphdr *)tp; 501 q = (struct tcpiphdr *)q->ti_next) 502 if (SEQ_GT(q->ti_seq, ti->ti_seq)) 503 break; 504 505 /* 506 * If there is a preceding segment, it may provide some of 507 * our data already. If so, drop the data from the incoming 508 * segment. If it provides all of our data, drop us. 509 */ 510 if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { 511 register int i; 512 q = (struct tcpiphdr *)(q->ti_prev); 513 /* conversion to int (in i) handles seq wraparound */ 514 i = q->ti_seq + q->ti_len - ti->ti_seq; 515 if (i > 0) { 516 if (i >= ti->ti_len) 517 goto drop; 518 m_adj(dtom(tp), i); 519 ti->ti_len -= i; 520 ti->ti_seq += i; 521 } 522 q = (struct tcpiphdr *)(q->ti_next); 523 } 524 525 /* 526 * While we overlap succeeding segments trim them or, 527 * if they are completely covered, dequeue them. 528 */ 529 while (q != (struct tcpiphdr *)tp && 530 SEQ_GT(ti->ti_seq + ti->ti_len, q->ti_seq)) { 531 register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; 532 if (i < q->ti_len) { 533 q->ti_len -= i; 534 m_adj(dtom(q), i); 535 break; 536 } 537 q = (struct tcpiphdr *)q->ti_next; 538 m_freem(dtom(q->ti_prev)); 539 remque(q->ti_prev); 540 } 541 542 /* 543 * Stick new segment in its place. 544 */ 545 insque(ti, q->ti_prev); 546 tp->seqcnt += ti->ti_len; 547 548 /* 549 * Calculate available space and discard segments for 550 * which there is too much. 551 */ 552 overage = 553 (so->so_rcv.sb_cc + tp->seqcnt) - so->so_rcv.sb_hiwat; 554 if (overage > 0) { 555 q = tp->seg_prev; 556 for (;;) { 557 register int i = MIN(q->ti_len, overage); 558 overage -= i; 559 tp->seqcnt -= i; 560 q->ti_len -= i; 561 m_adj(dtom(q), -i); 562 if (q->ti_len) 563 break; 564 if (q == ti) 565 panic("tcp_text dropall"); 566 q = (struct tcpiphdr *)q->ti_prev; 567 remque(q->ti_next); 568 } 569 } 570 571 /* 572 * Advance rcv_next through newly completed sequence space. 573 */ 574 while (ti->ti_seq == tp->rcv_nxt) { 575 tp->rcv_nxt += ti->ti_len; 576 flags = ti->ti_flags & TH_FIN; 577 ti = (struct tcpiphdr *)ti->ti_next; 578 if (ti == (struct tcpiphdr *)tp) 579 break; 580 } 581 582 present: 583 /* 584 * Present data to user. 585 */ 586 if (tp->t_state < ESTAB) 587 return (flags); 588 ti = tp->seg_next; 589 while (ti != (struct tcpiphdr *)tp && ti->ti_seq < tp->rcv_nxt) { 590 remque(ti); 591 sbappend(&so->so_rcv, dtom(ti)); 592 tp->seqcnt -= ti->ti_len; 593 if (tp->seqcnt < 0) 594 panic("tcp_reass"); 595 ti = (struct tcpiphdr *)ti->ti_next; 596 } 597 sorwakeup(so); 598 return (flags); 599 drop: 600 m_freem(dtom(ti)); 601 return (flags); 602 } 603