1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * This file contains functions related to TCP time wait processing. Also
28 * refer to the time wait handling comments in tcp_impl.h.
29 */
30
31 #include <sys/types.h>
32 #include <sys/strsun.h>
33 #include <sys/squeue_impl.h>
34 #include <sys/squeue.h>
35 #include <sys/callo.h>
36
37 #include <inet/common.h>
38 #include <inet/ip.h>
39 #include <inet/tcp.h>
40 #include <inet/tcp_impl.h>
41 #include <inet/tcp_cluster.h>
42
43 static void tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
44
45 /*
46 * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
47 * Running it every 5 seconds seems to give the best results.
48 */
49 #define TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
50
51 /*
52 * Remove a connection from the list of detached TIME_WAIT connections.
53 * It returns B_FALSE if it can't remove the connection from the list
54 * as the connection has already been removed from the list due to an
55 * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
56 */
57 boolean_t
tcp_time_wait_remove(tcp_t * tcp,tcp_squeue_priv_t * tcp_time_wait)58 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
59 {
60 boolean_t locked = B_FALSE;
61
62 if (tcp_time_wait == NULL) {
63 tcp_time_wait = *((tcp_squeue_priv_t **)
64 squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
65 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
66 locked = B_TRUE;
67 } else {
68 ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
69 }
70
71 /* 0 means that the tcp_t has not been added to the time wait list. */
72 if (tcp->tcp_time_wait_expire == 0) {
73 ASSERT(tcp->tcp_time_wait_next == NULL);
74 ASSERT(tcp->tcp_time_wait_prev == NULL);
75 if (locked)
76 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
77 return (B_FALSE);
78 }
79 ASSERT(TCP_IS_DETACHED(tcp));
80 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
81
82 if (tcp == tcp_time_wait->tcp_time_wait_head) {
83 ASSERT(tcp->tcp_time_wait_prev == NULL);
84 tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
85 if (tcp_time_wait->tcp_time_wait_head != NULL) {
86 tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
87 NULL;
88 } else {
89 tcp_time_wait->tcp_time_wait_tail = NULL;
90 }
91 } else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
92 ASSERT(tcp->tcp_time_wait_next == NULL);
93 tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
94 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
95 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
96 } else {
97 ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
98 ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
99 tcp->tcp_time_wait_prev->tcp_time_wait_next =
100 tcp->tcp_time_wait_next;
101 tcp->tcp_time_wait_next->tcp_time_wait_prev =
102 tcp->tcp_time_wait_prev;
103 }
104 tcp->tcp_time_wait_next = NULL;
105 tcp->tcp_time_wait_prev = NULL;
106 tcp->tcp_time_wait_expire = 0;
107
108 if (locked)
109 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
110 return (B_TRUE);
111 }
112
113 /*
114 * Add a connection to the list of detached TIME_WAIT connections
115 * and set its time to expire.
116 */
117 void
tcp_time_wait_append(tcp_t * tcp)118 tcp_time_wait_append(tcp_t *tcp)
119 {
120 tcp_stack_t *tcps = tcp->tcp_tcps;
121 squeue_t *sqp = tcp->tcp_connp->conn_sqp;
122 tcp_squeue_priv_t *tcp_time_wait =
123 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
124
125 tcp_timers_stop(tcp);
126
127 /* Freed above */
128 ASSERT(tcp->tcp_timer_tid == 0);
129 ASSERT(tcp->tcp_ack_tid == 0);
130
131 /* must have happened at the time of detaching the tcp */
132 ASSERT(tcp->tcp_ptpahn == NULL);
133 ASSERT(tcp->tcp_flow_stopped == 0);
134 ASSERT(tcp->tcp_time_wait_next == NULL);
135 ASSERT(tcp->tcp_time_wait_prev == NULL);
136 ASSERT(tcp->tcp_time_wait_expire == 0);
137 ASSERT(tcp->tcp_listener == NULL);
138
139 tcp->tcp_time_wait_expire = ddi_get_lbolt64();
140 /*
141 * Since tcp_time_wait_expire is lbolt64, it should not wrap around
142 * in practice. Hence it cannot be 0. Note that zero means that the
143 * tcp_t is not in the TIME_WAIT list.
144 */
145 tcp->tcp_time_wait_expire += MSEC_TO_TICK(
146 tcps->tcps_time_wait_interval);
147
148 ASSERT(TCP_IS_DETACHED(tcp));
149 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
150 ASSERT(tcp->tcp_time_wait_next == NULL);
151 ASSERT(tcp->tcp_time_wait_prev == NULL);
152 TCP_DBGSTAT(tcps, tcp_time_wait);
153
154 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
155 if (tcp_time_wait->tcp_time_wait_head == NULL) {
156 ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
157 tcp_time_wait->tcp_time_wait_head = tcp;
158
159 /*
160 * Even if the list was empty before, there may be a timer
161 * running since a tcp_t can be removed from the list
162 * in other places, such as tcp_clean_death(). So check if
163 * a timer is needed.
164 */
165 if (tcp_time_wait->tcp_time_wait_tid == 0) {
166 tcp_time_wait->tcp_time_wait_tid =
167 timeout_generic(CALLOUT_NORMAL,
168 tcp_time_wait_collector, sqp,
169 (hrtime_t)(tcps->tcps_time_wait_interval + 1) *
170 MICROSEC, CALLOUT_TCP_RESOLUTION,
171 CALLOUT_FLAG_ROUNDUP);
172 }
173 } else {
174 /*
175 * The list is not empty, so a timer must be running. If not,
176 * tcp_time_wait_collector() must be running on this
177 * tcp_time_wait list at the same time.
178 */
179 ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 ||
180 tcp_time_wait->tcp_time_wait_running);
181 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
182 ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
183 TCPS_TIME_WAIT);
184 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
185 tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;
186
187 }
188 tcp_time_wait->tcp_time_wait_tail = tcp;
189 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
190 }
191
192 /*
193 * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
194 * tcp_t. Used in tcp_time_wait_collector().
195 */
196 /* ARGSUSED */
197 static void
tcp_timewait_close(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)198 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
199 {
200 conn_t *connp = (conn_t *)arg;
201 tcp_t *tcp = connp->conn_tcp;
202
203 ASSERT(tcp != NULL);
204 if (tcp->tcp_state == TCPS_CLOSED) {
205 return;
206 }
207
208 ASSERT((connp->conn_family == AF_INET &&
209 connp->conn_ipversion == IPV4_VERSION) ||
210 (connp->conn_family == AF_INET6 &&
211 (connp->conn_ipversion == IPV4_VERSION ||
212 connp->conn_ipversion == IPV6_VERSION)));
213 ASSERT(!tcp->tcp_listener);
214
215 ASSERT(TCP_IS_DETACHED(tcp));
216
217 /*
218 * Because they have no upstream client to rebind or tcp_close()
219 * them later, we axe the connection here and now.
220 */
221 tcp_close_detached(tcp);
222 }
223
224 /*
225 * Blows away all tcps whose TIME_WAIT has expired. List traversal
226 * is done forwards from the head.
227 * This walks all stack instances since
228 * tcp_time_wait remains global across all stacks.
229 */
230 /* ARGSUSED */
231 void
tcp_time_wait_collector(void * arg)232 tcp_time_wait_collector(void *arg)
233 {
234 tcp_t *tcp;
235 int64_t now;
236 mblk_t *mp;
237 conn_t *connp;
238 kmutex_t *lock;
239 boolean_t removed;
240 extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
241 uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
242
243 squeue_t *sqp = (squeue_t *)arg;
244 tcp_squeue_priv_t *tcp_time_wait =
245 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
246
247 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
248 tcp_time_wait->tcp_time_wait_tid = 0;
249 #ifdef DEBUG
250 tcp_time_wait->tcp_time_wait_running = B_TRUE;
251 #endif
252
253 if (tcp_time_wait->tcp_free_list != NULL &&
254 tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
255 TCP_G_STAT(tcp_freelist_cleanup);
256 while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
257 tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
258 tcp->tcp_time_wait_next = NULL;
259 tcp_time_wait->tcp_free_list_cnt--;
260 ASSERT(tcp->tcp_tcps == NULL);
261 CONN_DEC_REF(tcp->tcp_connp);
262 }
263 ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
264 }
265
266 /*
267 * In order to reap time waits reliably, we should use a
268 * source of time that is not adjustable by the user -- hence
269 * the call to ddi_get_lbolt64().
270 */
271 now = ddi_get_lbolt64();
272 while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
273 /*
274 * lbolt64 should not wrap around in practice... So we can
275 * do a direct comparison.
276 */
277 if (now < tcp->tcp_time_wait_expire)
278 break;
279
280 removed = tcp_time_wait_remove(tcp, tcp_time_wait);
281 ASSERT(removed);
282
283 connp = tcp->tcp_connp;
284 ASSERT(connp->conn_fanout != NULL);
285 lock = &connp->conn_fanout->connf_lock;
286 /*
287 * This is essentially a TW reclaim fast path optimization for
288 * performance where the timewait collector checks under the
289 * fanout lock (so that no one else can get access to the
290 * conn_t) that the refcnt is 2 i.e. one for TCP and one for
291 * the classifier hash list. If ref count is indeed 2, we can
292 * just remove the conn under the fanout lock and avoid
293 * cleaning up the conn under the squeue, provided that
294 * clustering callbacks are not enabled. If clustering is
295 * enabled, we need to make the clustering callback before
296 * setting the CONDEMNED flag and after dropping all locks and
297 * so we forego this optimization and fall back to the slow
298 * path. Also please see the comments in tcp_closei_local
299 * regarding the refcnt logic.
300 *
301 * Since we are holding the tcp_time_wait_lock, its better
302 * not to block on the fanout_lock because other connections
303 * can't add themselves to time_wait list. So we do a
304 * tryenter instead of mutex_enter.
305 */
306 if (mutex_tryenter(lock)) {
307 mutex_enter(&connp->conn_lock);
308 if ((connp->conn_ref == 2) &&
309 (cl_inet_disconnect == NULL)) {
310 ipcl_hash_remove_locked(connp,
311 connp->conn_fanout);
312 /*
313 * Set the CONDEMNED flag now itself so that
314 * the refcnt cannot increase due to any
315 * walker.
316 */
317 connp->conn_state_flags |= CONN_CONDEMNED;
318 mutex_exit(lock);
319 mutex_exit(&connp->conn_lock);
320 if (tcp_time_wait->tcp_free_list_cnt <
321 tcp_free_list_max_cnt) {
322 /* Add to head of tcp_free_list */
323 mutex_exit(
324 &tcp_time_wait->tcp_time_wait_lock);
325 tcp_cleanup(tcp);
326 ASSERT(connp->conn_latch == NULL);
327 ASSERT(connp->conn_policy == NULL);
328 ASSERT(tcp->tcp_tcps == NULL);
329 ASSERT(connp->conn_netstack == NULL);
330
331 mutex_enter(
332 &tcp_time_wait->tcp_time_wait_lock);
333 tcp->tcp_time_wait_next =
334 tcp_time_wait->tcp_free_list;
335 tcp_time_wait->tcp_free_list = tcp;
336 tcp_time_wait->tcp_free_list_cnt++;
337 continue;
338 } else {
339 /* Do not add to tcp_free_list */
340 mutex_exit(
341 &tcp_time_wait->tcp_time_wait_lock);
342 tcp_bind_hash_remove(tcp);
343 ixa_cleanup(tcp->tcp_connp->conn_ixa);
344 tcp_ipsec_cleanup(tcp);
345 CONN_DEC_REF(tcp->tcp_connp);
346 }
347 } else {
348 CONN_INC_REF_LOCKED(connp);
349 mutex_exit(lock);
350 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
351 mutex_exit(&connp->conn_lock);
352 /*
353 * We can reuse the closemp here since conn has
354 * detached (otherwise we wouldn't even be in
355 * time_wait list). tcp_closemp_used can safely
356 * be changed without taking a lock as no other
357 * thread can concurrently access it at this
358 * point in the connection lifecycle.
359 */
360
361 if (tcp->tcp_closemp.b_prev == NULL)
362 tcp->tcp_closemp_used = B_TRUE;
363 else
364 cmn_err(CE_PANIC,
365 "tcp_timewait_collector: "
366 "concurrent use of tcp_closemp: "
367 "connp %p tcp %p\n", (void *)connp,
368 (void *)tcp);
369
370 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
371 mp = &tcp->tcp_closemp;
372 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
373 tcp_timewait_close, connp, NULL,
374 SQ_FILL, SQTAG_TCP_TIMEWAIT);
375 }
376 } else {
377 mutex_enter(&connp->conn_lock);
378 CONN_INC_REF_LOCKED(connp);
379 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
380 mutex_exit(&connp->conn_lock);
381 /*
382 * We can reuse the closemp here since conn has
383 * detached (otherwise we wouldn't even be in
384 * time_wait list). tcp_closemp_used can safely
385 * be changed without taking a lock as no other
386 * thread can concurrently access it at this
387 * point in the connection lifecycle.
388 */
389
390 if (tcp->tcp_closemp.b_prev == NULL)
391 tcp->tcp_closemp_used = B_TRUE;
392 else
393 cmn_err(CE_PANIC, "tcp_timewait_collector: "
394 "concurrent use of tcp_closemp: "
395 "connp %p tcp %p\n", (void *)connp,
396 (void *)tcp);
397
398 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
399 mp = &tcp->tcp_closemp;
400 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
401 tcp_timewait_close, connp, NULL,
402 SQ_FILL, SQTAG_TCP_TIMEWAIT);
403 }
404 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
405 }
406
407 if (tcp_time_wait->tcp_free_list != NULL)
408 tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
409
410 /*
411 * If the time wait list is not empty and there is no timer running,
412 * restart it.
413 */
414 if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
415 tcp_time_wait->tcp_time_wait_tid == 0) {
416 hrtime_t firetime;
417
418 firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
419 /* This ensures that we won't wake up too often. */
420 firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
421 tcp_time_wait->tcp_time_wait_tid =
422 timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
423 sqp, firetime, CALLOUT_TCP_RESOLUTION,
424 CALLOUT_FLAG_ROUNDUP);
425 }
426 #ifdef DEBUG
427 tcp_time_wait->tcp_time_wait_running = B_FALSE;
428 #endif
429 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
430 }
431
432 /*
433 * tcp_time_wait_processing() handles processing of incoming packets when
434 * the tcp_t is in the TIME_WAIT state.
435 *
436 * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
437 * detached state) is never put on the time wait list.
438 */
439 void
tcp_time_wait_processing(tcp_t * tcp,mblk_t * mp,uint32_t seg_seq,uint32_t seg_ack,int seg_len,tcpha_t * tcpha,ip_recv_attr_t * ira)440 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
441 uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
442 {
443 int32_t bytes_acked;
444 int32_t gap;
445 int32_t rgap;
446 tcp_opt_t tcpopt;
447 uint_t flags;
448 uint32_t new_swnd = 0;
449 conn_t *nconnp;
450 conn_t *connp = tcp->tcp_connp;
451 tcp_stack_t *tcps = tcp->tcp_tcps;
452
453 BUMP_LOCAL(tcp->tcp_ibsegs);
454 DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
455
456 flags = (unsigned int)tcpha->tha_flags & 0xFF;
457 new_swnd = ntohs(tcpha->tha_win) <<
458 ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
459 if (tcp->tcp_snd_ts_ok) {
460 if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
461 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
462 tcp->tcp_rnxt, TH_ACK);
463 goto done;
464 }
465 }
466 gap = seg_seq - tcp->tcp_rnxt;
467 rgap = tcp->tcp_rwnd - (gap + seg_len);
468 if (gap < 0) {
469 TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
470 TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
471 (seg_len > -gap ? -gap : seg_len));
472 seg_len += gap;
473 if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
474 if (flags & TH_RST) {
475 goto done;
476 }
477 if ((flags & TH_FIN) && seg_len == -1) {
478 /*
479 * When TCP receives a duplicate FIN in
480 * TIME_WAIT state, restart the 2 MSL timer.
481 * See page 73 in RFC 793. Make sure this TCP
482 * is already on the TIME_WAIT list. If not,
483 * just restart the timer.
484 */
485 if (TCP_IS_DETACHED(tcp)) {
486 if (tcp_time_wait_remove(tcp, NULL) ==
487 B_TRUE) {
488 tcp_time_wait_append(tcp);
489 TCP_DBGSTAT(tcps,
490 tcp_rput_time_wait);
491 }
492 } else {
493 ASSERT(tcp != NULL);
494 TCP_TIMER_RESTART(tcp,
495 tcps->tcps_time_wait_interval);
496 }
497 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
498 tcp->tcp_rnxt, TH_ACK);
499 goto done;
500 }
501 flags |= TH_ACK_NEEDED;
502 seg_len = 0;
503 goto process_ack;
504 }
505
506 /* Fix seg_seq, and chew the gap off the front. */
507 seg_seq = tcp->tcp_rnxt;
508 }
509
510 if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
511 /*
512 * Make sure that when we accept the connection, pick
513 * an ISS greater than (tcp_snxt + ISS_INCR/2) for the
514 * old connection.
515 *
516 * The next ISS generated is equal to tcp_iss_incr_extra
517 * + ISS_INCR/2 + other components depending on the
518 * value of tcp_strong_iss. We pre-calculate the new
519 * ISS here and compare with tcp_snxt to determine if
520 * we need to make adjustment to tcp_iss_incr_extra.
521 *
522 * The above calculation is ugly and is a
523 * waste of CPU cycles...
524 */
525 uint32_t new_iss = tcps->tcps_iss_incr_extra;
526 int32_t adj;
527 ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
528
529 switch (tcps->tcps_strong_iss) {
530 case 2: {
531 /* Add time and MD5 components. */
532 uint32_t answer[4];
533 struct {
534 uint32_t ports;
535 in6_addr_t src;
536 in6_addr_t dst;
537 } arg;
538 MD5_CTX context;
539
540 mutex_enter(&tcps->tcps_iss_key_lock);
541 context = tcps->tcps_iss_key;
542 mutex_exit(&tcps->tcps_iss_key_lock);
543 arg.ports = connp->conn_ports;
544 /* We use MAPPED addresses in tcp_iss_init */
545 arg.src = connp->conn_laddr_v6;
546 arg.dst = connp->conn_faddr_v6;
547 MD5Update(&context, (uchar_t *)&arg,
548 sizeof (arg));
549 MD5Final((uchar_t *)answer, &context);
550 answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
551 new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
552 break;
553 }
554 case 1:
555 /* Add time component and min random (i.e. 1). */
556 new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
557 break;
558 default:
559 /* Add only time component. */
560 new_iss += (uint32_t)gethrestime_sec() * ISS_INCR;
561 break;
562 }
563 if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
564 /*
565 * New ISS not guaranteed to be ISS_INCR/2
566 * ahead of the current tcp_snxt, so add the
567 * difference to tcp_iss_incr_extra.
568 */
569 tcps->tcps_iss_incr_extra += adj;
570 }
571 /*
572 * If tcp_clean_death() can not perform the task now,
573 * drop the SYN packet and let the other side re-xmit.
574 * Otherwise pass the SYN packet back in, since the
575 * old tcp state has been cleaned up or freed.
576 */
577 if (tcp_clean_death(tcp, 0) == -1)
578 goto done;
579 nconnp = ipcl_classify(mp, ira, ipst);
580 if (nconnp != NULL) {
581 TCP_STAT(tcps, tcp_time_wait_syn_success);
582 /* Drops ref on nconnp */
583 tcp_reinput(nconnp, mp, ira, ipst);
584 return;
585 }
586 goto done;
587 }
588
589 /*
590 * rgap is the amount of stuff received out of window. A negative
591 * value is the amount out of window.
592 */
593 if (rgap < 0) {
594 TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
595 TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
596 /* Fix seg_len and make sure there is something left. */
597 seg_len += rgap;
598 if (seg_len <= 0) {
599 if (flags & TH_RST) {
600 goto done;
601 }
602 flags |= TH_ACK_NEEDED;
603 seg_len = 0;
604 goto process_ack;
605 }
606 }
607 /*
608 * Check whether we can update tcp_ts_recent. This test is
609 * NOT the one in RFC 1323 3.4. It is from Braden, 1993, "TCP
610 * Extensions for High Performance: An Update", Internet Draft.
611 */
612 if (tcp->tcp_snd_ts_ok &&
613 TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
614 SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
615 tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
616 tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
617 }
618
619 if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
620 /* Always ack out of order packets */
621 flags |= TH_ACK_NEEDED;
622 seg_len = 0;
623 } else if (seg_len > 0) {
624 TCPS_BUMP_MIB(tcps, tcpInClosed);
625 TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
626 TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
627 }
628 if (flags & TH_RST) {
629 (void) tcp_clean_death(tcp, 0);
630 goto done;
631 }
632 if (flags & TH_SYN) {
633 tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
634 TH_RST|TH_ACK);
635 /*
636 * Do not delete the TCP structure if it is in
637 * TIME_WAIT state. Refer to RFC 1122, 4.2.2.13.
638 */
639 goto done;
640 }
641 process_ack:
642 if (flags & TH_ACK) {
643 bytes_acked = (int)(seg_ack - tcp->tcp_suna);
644 if (bytes_acked <= 0) {
645 if (bytes_acked == 0 && seg_len == 0 &&
646 new_swnd == tcp->tcp_swnd)
647 TCPS_BUMP_MIB(tcps, tcpInDupAck);
648 } else {
649 /* Acks something not sent */
650 flags |= TH_ACK_NEEDED;
651 }
652 }
653 if (flags & TH_ACK_NEEDED) {
654 /*
655 * Time to send an ack for some reason.
656 */
657 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
658 tcp->tcp_rnxt, TH_ACK);
659 }
660 done:
661 freemsg(mp);
662 }
663