1*25202Skarels #ifdef	RCSIDENT
2*25202Skarels static char rcsident[] = "$Header: tcp_prim.c,v 1.23 85/07/31 09:34:04 walsh Exp $";
3*25202Skarels #endif
4*25202Skarels 
5*25202Skarels #include "../h/param.h"
6*25202Skarels #include "../h/errno.h"
7*25202Skarels #include "../h/mbuf.h"
8*25202Skarels #include "../h/socket.h"
9*25202Skarels #include "../h/socketvar.h"
10*25202Skarels #include "../h/protosw.h"
11*25202Skarels 
12*25202Skarels #include "../net/if.h"
13*25202Skarels #include "../net/route.h"
14*25202Skarels 
15*25202Skarels #include "../bbnnet/in.h"
16*25202Skarels #include "../bbnnet/in_pcb.h"
17*25202Skarels #include "../bbnnet/in_var.h"
18*25202Skarels #include "../bbnnet/net.h"
19*25202Skarels #include "../bbnnet/fsm.h"
20*25202Skarels #include "../bbnnet/tcp.h"
21*25202Skarels #include "../bbnnet/seq.h"
22*25202Skarels #include "../bbnnet/ip.h"
23*25202Skarels #include "../bbnnet/macros.h"
24*25202Skarels #include "../bbnnet/sws.h"
25*25202Skarels 
26*25202Skarels extern struct inpcb tcp;
27*25202Skarels 
28*25202Skarels /*
29*25202Skarels  * TCP finite state machine primitives
30*25202Skarels  *
31*25202Skarels  * These routines are called from the procedures in tcp_procs.c to do low
32*25202Skarels  * level protocol functions.
33*25202Skarels  */
34*25202Skarels 
35*25202Skarels /*
36*25202Skarels  * The hope is that a bcopy will fill in most tcp/ip header fields quicker
37*25202Skarels  * than a step by step stuffing of each individually when we have to send
38*25202Skarels  * some info.
39*25202Skarels  */
tcp_template(tp)40*25202Skarels struct th *tcp_template(tp)
41*25202Skarels struct tcpcb	*tp;
42*25202Skarels {
43*25202Skarels     register struct mbuf	*m;
44*25202Skarels     register struct th	*t;
45*25202Skarels     register struct inpcb	*inp;
46*25202Skarels 
47*25202Skarels     m = m_getclr(M_WAIT, MT_HEADER);
48*25202Skarels     if (m == NULL)
49*25202Skarels 	return ((struct th *) NULL);
50*25202Skarels     m->m_len = sizeof (struct th);
51*25202Skarels     t = mtod(m, struct th *);
52*25202Skarels     inp = tp->t_in_pcb;
53*25202Skarels 
54*25202Skarels     t->t_pr = IPPROTO_TCP;
55*25202Skarels     t->t_s = inp->inp_laddr;
56*25202Skarels     t->t_d = inp->inp_faddr;
57*25202Skarels     t->t_src = inp->inp_lport;
58*25202Skarels     t->t_dst = inp->inp_fport;
59*25202Skarels     t->t_off = TCPSIZE >> TCP_OFFSHIFT;
60*25202Skarels 
61*25202Skarels     return (t);
62*25202Skarels }
63*25202Skarels 
64*25202Skarels #ifdef GPROF
65*25202Skarels long tcp_pkt_size[2*1024];
66*25202Skarels #endif
67*25202Skarels 
68*25202Skarels /*
69*25202Skarels  * Send a tcp segment
70*25202Skarels  */
send_pkt(tp,flags,len)71*25202Skarels send_pkt(tp, flags, len)
72*25202Skarels register struct tcpcb *tp;
73*25202Skarels register int flags;
74*25202Skarels register int len;	/* in sequence units: includes SYN, FIN */
75*25202Skarels {
76*25202Skarels     register struct th *t;
77*25202Skarels     register struct inpcb *inp;
78*25202Skarels     register struct mbuf *m;
79*25202Skarels     struct mbuf *dat;
80*25202Skarels     int i;
81*25202Skarels     struct sockbuf *sorcv;
82*25202Skarels     short *p;
83*25202Skarels     struct th *tmpt;
84*25202Skarels 
85*25202Skarels     inp = tp->t_in_pcb;
86*25202Skarels 
87*25202Skarels     /*
88*25202Skarels      * Adjust data length for SYN and FIN.
89*25202Skarels      */
90*25202Skarels     if (flags & T_FIN)
91*25202Skarels 	len--;
92*25202Skarels     if (flags & T_SYN)
93*25202Skarels 	len--;
94*25202Skarels 
95*25202Skarels     /*
96*25202Skarels      * and get a copy of the data for this transmission
97*25202Skarels      */
98*25202Skarels     dat = (struct mbuf *) NULL;
99*25202Skarels     if (len > 0)
100*25202Skarels     {
101*25202Skarels 	int	off;
102*25202Skarels 
103*25202Skarels 	off = tp->snd_nxt - tp->snd_una;
104*25202Skarels 	if (! tp->syn_acked)
105*25202Skarels 	    if (! (flags & T_SYN))
106*25202Skarels 		off --;
107*25202Skarels 	if ((dat = m_copy (inp->inp_socket->so_snd.sb_mb, off, len)) == NULL)
108*25202Skarels 	    return (ENOBUFS);
109*25202Skarels     }
110*25202Skarels 
111*25202Skarels #ifdef MBUF_DEBUG
112*25202Skarels     m = m_get(M_WAIT, MT_HEADER);
113*25202Skarels #else
114*25202Skarels     MGET(m, M_WAIT, MT_HEADER);
115*25202Skarels #endif
116*25202Skarels     if (m == NULL)
117*25202Skarels 	return(ENOBUFS);
118*25202Skarels     /*
119*25202Skarels      * Build tcp leader at bottom of new buffer to leave room for lower
120*25202Skarels      * level leaders.  Leave an extra four bytes for TCP max segment size
121*25202Skarels      * option, which is sent in SYN packets.
122*25202Skarels      * Align header for memory access speed in checksumming...
123*25202Skarels      */
124*25202Skarels     m->m_off = (MMAXOFF - sizeof(struct th) - TCP_MAXSEG_OPTLEN) &
125*25202Skarels 	~(sizeof(long) -1);
126*25202Skarels     m->m_len = sizeof(struct th);
127*25202Skarels     m->m_next = dat;
128*25202Skarels     t = mtod(m, struct th *);
129*25202Skarels 
130*25202Skarels     if (tp->t_template == NULL)
131*25202Skarels 	panic("send_pkt");
132*25202Skarels     bcopy((caddr_t)tp->t_template, (caddr_t)t, sizeof (struct th));
133*25202Skarels 
134*25202Skarels #ifndef NOTCPOPTS
135*25202Skarels     /*
136*25202Skarels      * Insert maximum segment size option for SYN.
137*25202Skarels      */
138*25202Skarels     if (flags & T_SYN)
139*25202Skarels     {
140*25202Skarels 	/*
141*25202Skarels 	 * may not have a route yet.
142*25202Skarels 	 */
143*25202Skarels 	if (inp->inp_route.ro_rt)
144*25202Skarels 	{
145*25202Skarels 	    m->m_len += TCP_MAXSEG_OPTLEN;
146*25202Skarels 	    /* SYN occupies seq space */
147*25202Skarels 	    len += TCP_MAXSEG_OPTLEN;
148*25202Skarels 	    t->t_off = (TCPSIZE + TCP_MAXSEG_OPTLEN) >> TCP_OFFSHIFT;
149*25202Skarels 	    p = (short *)((int)t + sizeof(struct th));
150*25202Skarels 	    *p++ = htons((u_short)TCP_MAXSEG_OPTHDR);
151*25202Skarels 	    *p = htons((u_short)inp->inp_route.ro_rt->rt_ifp->if_mtu
152*25202Skarels 		- TCPIPMAX);
153*25202Skarels 	}
154*25202Skarels     }
155*25202Skarels #endif
156*25202Skarels 
157*25202Skarels     t->t_len   = htons((u_short)len + TCPSIZE);
158*25202Skarels     t->t_seq   = htonl(tp->snd_nxt);
159*25202Skarels     t->t_ackno = htonl(tp->rcv_nxt);
160*25202Skarels 
161*25202Skarels #ifdef GPROF
162*25202Skarels     if (len < (sizeof(tcp_pkt_size)/sizeof(tcp_pkt_size[0])) - TCPSIZE)
163*25202Skarels 	tcp_pkt_size[len+TCPSIZE] ++;
164*25202Skarels     else
165*25202Skarels 	tcp_pkt_size[0] ++;
166*25202Skarels #endif
167*25202Skarels 
168*25202Skarels     if (tp->snd_rst)
169*25202Skarels     {
170*25202Skarels 	flags |= T_RST;
171*25202Skarels 	flags &= ~T_SYN;
172*25202Skarels     }
173*25202Skarels 
174*25202Skarels     if (tp->snd_urg)
175*25202Skarels     {
176*25202Skarels 	if (SEQ_GT(tp->snd_urp, tp->snd_nxt))
177*25202Skarels 	{
178*25202Skarels 	    short	up;
179*25202Skarels 
180*25202Skarels 	    /*
181*25202Skarels 	     * SEQ_LEQ(tp->snd_urp, tp->snd_nxt+len)
182*25202Skarels 	     * Strictly speaking, we should be able to have the
183*25202Skarels 	     * urgent pointer point into the stream beyond the
184*25202Skarels 	     * current end of packet, but this is just in case
185*25202Skarels 	     * some implementation is not ready for that.
186*25202Skarels 	     */
187*25202Skarels 	    flags |= T_URG;
188*25202Skarels 	    up = MIN(len -1, tp->snd_urp - tp->snd_nxt -1);
189*25202Skarels 	    t->t_urp = htons((u_short)up);
190*25202Skarels 	}
191*25202Skarels     }
192*25202Skarels 
193*25202Skarels     if (tp->syn_rcvd)
194*25202Skarels     {
195*25202Skarels #if T_DELACK > 0
196*25202Skarels 	tp->lastack = tp->rcv_nxt;
197*25202Skarels #endif
198*25202Skarels 	flags |= T_ACK;
199*25202Skarels     }
200*25202Skarels 
201*25202Skarels     t->t_flags = flags;
202*25202Skarels 
203*25202Skarels     /* Calculate the window we should advertise */
204*25202Skarels 
205*25202Skarels     sorcv = &inp->inp_socket->so_rcv;
206*25202Skarels     /*
207*25202Skarels      * Count bytes left in user rcv queue, and reduce by sequencing queue
208*25202Skarels      * Counting the sequencing q contracts the window when packets are
209*25202Skarels      * lost (== when the network is over-loaded).
210*25202Skarels      */
211*25202Skarels     i = sbspace(sorcv) - tp->t_rcv_len;
212*25202Skarels     /*
213*25202Skarels      * apply receiver's solution to SWS in case sender does not have such
214*25202Skarels      * an algorithm.  One 8th was determined by benchmarks writing 2k
215*25202Skarels      * buffers on an Ethernet connection.  It has a slightly negative
216*25202Skarels      * influence on IMP networks when writing 1k buffers.
217*25202Skarels      *
218*25202Skarels      * (sorcv->sb_hiwat >> 3) limited by 256 == 2k / 8, since application
219*25202Skarels      * may adjust its buffering to large amounts via ioctl call.  An eighth
220*25202Skarels      * of a large number may be a reasonable sized packet to send.
221*25202Skarels      *
222*25202Skarels      * Only apply this algorithm if are getting packets in order,
223*25202Skarels      * so that advertisement of 0 window does not prevent retransmission
224*25202Skarels      * of dropped packet.
225*25202Skarels      */
226*25202Skarels     tmpt = tp->t_rcv_next;
227*25202Skarels     if ((i < MIN(256, (sorcv->sb_hiwat >> 3))) &&
228*25202Skarels 	((tmpt == (struct th *) tp) || SEQ_LEQ(tmpt->t_seq, tp->rcv_nxt)))
229*25202Skarels 	i = 0;
230*25202Skarels     else
231*25202Skarels     {
232*25202Skarels 	/*
233*25202Skarels 	 * if this connection gets fragmented, constrain the windowsize
234*25202Skarels 	 */
235*25202Skarels 	if (tp->t_maxfrag)
236*25202Skarels 	    i = MIN(i, tp->t_maxfrag*3);
237*25202Skarels 
238*25202Skarels 	if (i < 0)
239*25202Skarels 	    i = 0;
240*25202Skarels     }
241*25202Skarels 
242*25202Skarels #if T_DELACK > 0
243*25202Skarels     tp->rcv_wnd = i;
244*25202Skarels #endif
245*25202Skarels     t->t_win = htons((u_short)i);
246*25202Skarels     /*
247*25202Skarels      * If we sent a zero window, we should try to send a non-zero ACK ASAP.
248*25202Skarels      */
249*25202Skarels     if (i == 0)
250*25202Skarels 	tp->sent_zero = TRUE;
251*25202Skarels     else
252*25202Skarels 	tp->sent_zero = FALSE;
253*25202Skarels 
254*25202Skarels     t->t_sum = in_cksum(m, len + sizeof(struct th));
255*25202Skarels 
256*25202Skarels     if (inp->inp_socket->so_options & SO_DEBUG)
257*25202Skarels     {
258*25202Skarels 	struct work w;
259*25202Skarels 
260*25202Skarels 	w.w_type = INRECV;	/* not really. use -1 newstate */
261*25202Skarels 	w.w_tcb  = tp;
262*25202Skarels 	w.w_dat  = (char *)t;
263*25202Skarels 	tcp_debug(tp, &w, -1);
264*25202Skarels     }
265*25202Skarels 
266*25202Skarels     /*
267*25202Skarels      * and ship packet off via IP.  Remember that since this protocol
268*25202Skarels      * involves retransmissions, errors can occur asynchronous to a
269*25202Skarels      * (write) system call, and that therefore we can not send the
270*25202Skarels      * error all the way back up through subroutine return values.  We
271*25202Skarels      * must also post it back via advise_user() at some point, and this
272*25202Skarels      * looks like a good point to try it.
273*25202Skarels      */
274*25202Skarels     {
275*25202Skarels 	int	error;
276*25202Skarels 
277*25202Skarels 	error = ip_send(inp, m, len+TCPSIZE, FALSE);
278*25202Skarels 	if (error)
279*25202Skarels 	    /*
280*25202Skarels 	     * Since we use retransmissions, don't need to tell user
281*25202Skarels 	     * process about this.  (Can be as simple as interface
282*25202Skarels 	     * or host structure queues are too long due to current
283*25202Skarels 	     * heavy traffic.  Backing off will take care of that.)
284*25202Skarels 	     */
285*25202Skarels 	    if (error != ENOBUFS)
286*25202Skarels 		advise_user(inp->inp_socket, error);
287*25202Skarels 	return (error);
288*25202Skarels     }
289*25202Skarels }
290*25202Skarels 
291*25202Skarels /*
292*25202Skarels  * Find the first empty spot in rcv buffer
293*25202Skarels  */
firstempty(tp)294*25202Skarels sequence firstempty(tp)
295*25202Skarels register struct tcpcb *tp;
296*25202Skarels {
297*25202Skarels     sequence	retval;
298*25202Skarels 
299*25202Skarels     FIRSTEMPTY(tp, retval);
300*25202Skarels     return(retval);
301*25202Skarels }
302*25202Skarels 
303*25202Skarels 
304*25202Skarels /*
305*25202Skarels  * TCP timer update routine
306*25202Skarels  */
tcp_timeo()307*25202Skarels tcp_timeo()
308*25202Skarels {
309*25202Skarels     register struct inpcb *inp, *next;
310*25202Skarels     register struct tcpcb *tp;
311*25202Skarels     register i;
312*25202Skarels     register s;
313*25202Skarels     extern sequence tcp_iss;	/* tcp initial send seq # */
314*25202Skarels     static int tcpmins;	/* tcp minute timer */
315*25202Skarels 
316*25202Skarels     /* search through tcb and update active timers */
317*25202Skarels     s = splnet();
318*25202Skarels     inp = tcp.inp_next;
319*25202Skarels     while (inp != &tcp)
320*25202Skarels     {
321*25202Skarels 	next = inp->inp_next;
322*25202Skarels 	if (tp = inptotcpcb(inp))
323*25202Skarels 	{
324*25202Skarels 	    if (tp->sws_qff < SWS_QFF_DEF)
325*25202Skarels 		tp->sws_qff ++;
326*25202Skarels 
327*25202Skarels 	    for (i = TINIT; i <= TDELACK; i++)
328*25202Skarels 		if (tp->t_timers[i] != 0 && --tp->t_timers[i] == 0)
329*25202Skarels 		{
330*25202Skarels 		    struct work w;
331*25202Skarels 
332*25202Skarels 		    w.w_type = ISTIMER;
333*25202Skarels 		    w.w_stype = i;
334*25202Skarels 		    w.w_tcb = tp;
335*25202Skarels 		    w.w_dat = (char *) NULL;
336*25202Skarels 		    if (action(&w) == CLOSED)
337*25202Skarels 			goto next_tcb;
338*25202Skarels 		}
339*25202Skarels 
340*25202Skarels 	    if (tp->t_timers[TXMT] < MAX_TCPTIMERVAL-1)
341*25202Skarels 		tp->t_timers[TXMT]++;
342*25202Skarels 
343*25202Skarels 	    if (tcpmins == 0)
344*25202Skarels 	    {
345*25202Skarels 		if (tp->t_timers[TNOACT] != 0 && --tp->t_timers[TNOACT] == 0)
346*25202Skarels 		    w_alloc(ISTIMER, TNOACT, tp, 0);
347*25202Skarels 	    }
348*25202Skarels 	}
349*25202Skarels next_tcb:
350*25202Skarels 	inp = next;
351*25202Skarels     }
352*25202Skarels     splx(s);
353*25202Skarels 
354*25202Skarels     if (--tcpmins < 0)
355*25202Skarels 	tcpmins = 120-1;	/* zero-origin strikes again */
356*25202Skarels     tcp_iss += ISSINCR;	/* increment iss */
357*25202Skarels }
358*25202Skarels 
359*25202Skarels 
360*25202Skarels /*
361*25202Skarels  * Do TCP option processing
362*25202Skarels  */
tcp_opt(tp,t,hlen)363*25202Skarels tcp_opt(tp, t, hlen)
364*25202Skarels register struct tcpcb *tp;
365*25202Skarels register struct th *t;
366*25202Skarels int hlen;
367*25202Skarels {
368*25202Skarels     register char *p;
369*25202Skarels     register i, j, len;
370*25202Skarels 
371*25202Skarels     p = (char *)((int)t + sizeof(struct th));	/* -> at options */
372*25202Skarels 
373*25202Skarels     if ((i = hlen - TCPSIZE) > 0)
374*25202Skarels     {			/* any options */
375*25202Skarels 
376*25202Skarels 	while (i > 0)
377*25202Skarels 
378*25202Skarels 	    switch (*p++)
379*25202Skarels 	{
380*25202Skarels 	  case TCP_END_OPT:
381*25202Skarels 	  default:	/* garbage: throw away rest */
382*25202Skarels 	    return;
383*25202Skarels 
384*25202Skarels 	  case TCP_NOP_OPT:
385*25202Skarels 	    i--;
386*25202Skarels 	    break;
387*25202Skarels 
388*25202Skarels 	  case TCP_MAXSEG_OPT:	/* max segment size */
389*25202Skarels 	    if (t->t_flags&T_SYN && !tp->syn_rcvd)
390*25202Skarels 	    {
391*25202Skarels 		len = ntohs(*(u_short *)((int)p + 1));
392*25202Skarels 		/*
393*25202Skarels 		 * may not have a route yet
394*25202Skarels 		 */
395*25202Skarels 		if (!tp->t_in_pcb->inp_route.ro_rt)
396*25202Skarels 		    /* in LISTEN state */
397*25202Skarels 		    tp->t_maxseg = len;
398*25202Skarels 		else
399*25202Skarels 		    /* connecting to server */
400*25202Skarels 		    tp->t_maxseg =
401*25202Skarels 		    MIN(tp->t_in_pcb->inp_route.ro_rt->rt_ifp->if_mtu -
402*25202Skarels 		    TCPIPMAX, len);
403*25202Skarels 		tp->t_maxseg -= tp->t_in_pcb->inp_optlen;
404*25202Skarels 	    }
405*25202Skarels 	    if ((j = *p) == 0)
406*25202Skarels 		break;
407*25202Skarels 	    i -= j;
408*25202Skarels 	    p += j - 1;
409*25202Skarels 	}
410*25202Skarels     }
411*25202Skarels }
412*25202Skarels 
413*25202Skarels /*
414*25202Skarels  * Called at splimp from uipc_mbuf.c
415*25202Skarels  * Network needs some space freed!  Remove unprocessed packets.
416*25202Skarels  */
tcp_drain()417*25202Skarels tcp_drain()
418*25202Skarels {
419*25202Skarels     register struct inpcb *inp;
420*25202Skarels     register struct tcpcb *tp;
421*25202Skarels     register struct mbuf *m;
422*25202Skarels 
423*25202Skarels     for (inp = tcp.inp_next; inp != &tcp; inp = inp->inp_next)
424*25202Skarels     {
425*25202Skarels 	tp = (struct tcpcb *)inp->inp_ppcb;
426*25202Skarels 
427*25202Skarels 	if (tp == NULL)
428*25202Skarels 	    continue;
429*25202Skarels 
430*25202Skarels 	while (m = tp->t_rcv_unack)
431*25202Skarels 	{
432*25202Skarels 	    tp->t_rcv_unack = m->m_act;
433*25202Skarels 	    m->m_act = (struct mbuf *)NULL;
434*25202Skarels 	    m_freem (m);
435*25202Skarels 	}
436*25202Skarels     }
437*25202Skarels }
438