1*25202Skarels #ifdef RCSIDENT
2*25202Skarels static char rcsident[] = "$Header: tcp_prim.c,v 1.23 85/07/31 09:34:04 walsh Exp $";
3*25202Skarels #endif
4*25202Skarels
5*25202Skarels #include "../h/param.h"
6*25202Skarels #include "../h/errno.h"
7*25202Skarels #include "../h/mbuf.h"
8*25202Skarels #include "../h/socket.h"
9*25202Skarels #include "../h/socketvar.h"
10*25202Skarels #include "../h/protosw.h"
11*25202Skarels
12*25202Skarels #include "../net/if.h"
13*25202Skarels #include "../net/route.h"
14*25202Skarels
15*25202Skarels #include "../bbnnet/in.h"
16*25202Skarels #include "../bbnnet/in_pcb.h"
17*25202Skarels #include "../bbnnet/in_var.h"
18*25202Skarels #include "../bbnnet/net.h"
19*25202Skarels #include "../bbnnet/fsm.h"
20*25202Skarels #include "../bbnnet/tcp.h"
21*25202Skarels #include "../bbnnet/seq.h"
22*25202Skarels #include "../bbnnet/ip.h"
23*25202Skarels #include "../bbnnet/macros.h"
24*25202Skarels #include "../bbnnet/sws.h"
25*25202Skarels
26*25202Skarels extern struct inpcb tcp;
27*25202Skarels
28*25202Skarels /*
29*25202Skarels * TCP finite state machine primitives
30*25202Skarels *
31*25202Skarels * These routines are called from the procedures in tcp_procs.c to do low
32*25202Skarels * level protocol functions.
33*25202Skarels */
34*25202Skarels
35*25202Skarels /*
36*25202Skarels * The hope is that a bcopy will fill in most tcp/ip header fields quicker
37*25202Skarels * than a step by step stuffing of each individually when we have to send
38*25202Skarels * some info.
39*25202Skarels */
tcp_template(tp)40*25202Skarels struct th *tcp_template(tp)
41*25202Skarels struct tcpcb *tp;
42*25202Skarels {
43*25202Skarels register struct mbuf *m;
44*25202Skarels register struct th *t;
45*25202Skarels register struct inpcb *inp;
46*25202Skarels
47*25202Skarels m = m_getclr(M_WAIT, MT_HEADER);
48*25202Skarels if (m == NULL)
49*25202Skarels return ((struct th *) NULL);
50*25202Skarels m->m_len = sizeof (struct th);
51*25202Skarels t = mtod(m, struct th *);
52*25202Skarels inp = tp->t_in_pcb;
53*25202Skarels
54*25202Skarels t->t_pr = IPPROTO_TCP;
55*25202Skarels t->t_s = inp->inp_laddr;
56*25202Skarels t->t_d = inp->inp_faddr;
57*25202Skarels t->t_src = inp->inp_lport;
58*25202Skarels t->t_dst = inp->inp_fport;
59*25202Skarels t->t_off = TCPSIZE >> TCP_OFFSHIFT;
60*25202Skarels
61*25202Skarels return (t);
62*25202Skarels }
63*25202Skarels
64*25202Skarels #ifdef GPROF
65*25202Skarels long tcp_pkt_size[2*1024];
66*25202Skarels #endif
67*25202Skarels
68*25202Skarels /*
69*25202Skarels * Send a tcp segment
70*25202Skarels */
send_pkt(tp,flags,len)71*25202Skarels send_pkt(tp, flags, len)
72*25202Skarels register struct tcpcb *tp;
73*25202Skarels register int flags;
74*25202Skarels register int len; /* in sequence units: includes SYN, FIN */
75*25202Skarels {
76*25202Skarels register struct th *t;
77*25202Skarels register struct inpcb *inp;
78*25202Skarels register struct mbuf *m;
79*25202Skarels struct mbuf *dat;
80*25202Skarels int i;
81*25202Skarels struct sockbuf *sorcv;
82*25202Skarels short *p;
83*25202Skarels struct th *tmpt;
84*25202Skarels
85*25202Skarels inp = tp->t_in_pcb;
86*25202Skarels
87*25202Skarels /*
88*25202Skarels * Adjust data length for SYN and FIN.
89*25202Skarels */
90*25202Skarels if (flags & T_FIN)
91*25202Skarels len--;
92*25202Skarels if (flags & T_SYN)
93*25202Skarels len--;
94*25202Skarels
95*25202Skarels /*
96*25202Skarels * and get a copy of the data for this transmission
97*25202Skarels */
98*25202Skarels dat = (struct mbuf *) NULL;
99*25202Skarels if (len > 0)
100*25202Skarels {
101*25202Skarels int off;
102*25202Skarels
103*25202Skarels off = tp->snd_nxt - tp->snd_una;
104*25202Skarels if (! tp->syn_acked)
105*25202Skarels if (! (flags & T_SYN))
106*25202Skarels off --;
107*25202Skarels if ((dat = m_copy (inp->inp_socket->so_snd.sb_mb, off, len)) == NULL)
108*25202Skarels return (ENOBUFS);
109*25202Skarels }
110*25202Skarels
111*25202Skarels #ifdef MBUF_DEBUG
112*25202Skarels m = m_get(M_WAIT, MT_HEADER);
113*25202Skarels #else
114*25202Skarels MGET(m, M_WAIT, MT_HEADER);
115*25202Skarels #endif
116*25202Skarels if (m == NULL)
117*25202Skarels return(ENOBUFS);
118*25202Skarels /*
119*25202Skarels * Build tcp leader at bottom of new buffer to leave room for lower
120*25202Skarels * level leaders. Leave an extra four bytes for TCP max segment size
121*25202Skarels * option, which is sent in SYN packets.
122*25202Skarels * Align header for memory access speed in checksumming...
123*25202Skarels */
124*25202Skarels m->m_off = (MMAXOFF - sizeof(struct th) - TCP_MAXSEG_OPTLEN) &
125*25202Skarels ~(sizeof(long) -1);
126*25202Skarels m->m_len = sizeof(struct th);
127*25202Skarels m->m_next = dat;
128*25202Skarels t = mtod(m, struct th *);
129*25202Skarels
130*25202Skarels if (tp->t_template == NULL)
131*25202Skarels panic("send_pkt");
132*25202Skarels bcopy((caddr_t)tp->t_template, (caddr_t)t, sizeof (struct th));
133*25202Skarels
134*25202Skarels #ifndef NOTCPOPTS
135*25202Skarels /*
136*25202Skarels * Insert maximum segment size option for SYN.
137*25202Skarels */
138*25202Skarels if (flags & T_SYN)
139*25202Skarels {
140*25202Skarels /*
141*25202Skarels * may not have a route yet.
142*25202Skarels */
143*25202Skarels if (inp->inp_route.ro_rt)
144*25202Skarels {
145*25202Skarels m->m_len += TCP_MAXSEG_OPTLEN;
146*25202Skarels /* SYN occupies seq space */
147*25202Skarels len += TCP_MAXSEG_OPTLEN;
148*25202Skarels t->t_off = (TCPSIZE + TCP_MAXSEG_OPTLEN) >> TCP_OFFSHIFT;
149*25202Skarels p = (short *)((int)t + sizeof(struct th));
150*25202Skarels *p++ = htons((u_short)TCP_MAXSEG_OPTHDR);
151*25202Skarels *p = htons((u_short)inp->inp_route.ro_rt->rt_ifp->if_mtu
152*25202Skarels - TCPIPMAX);
153*25202Skarels }
154*25202Skarels }
155*25202Skarels #endif
156*25202Skarels
157*25202Skarels t->t_len = htons((u_short)len + TCPSIZE);
158*25202Skarels t->t_seq = htonl(tp->snd_nxt);
159*25202Skarels t->t_ackno = htonl(tp->rcv_nxt);
160*25202Skarels
161*25202Skarels #ifdef GPROF
162*25202Skarels if (len < (sizeof(tcp_pkt_size)/sizeof(tcp_pkt_size[0])) - TCPSIZE)
163*25202Skarels tcp_pkt_size[len+TCPSIZE] ++;
164*25202Skarels else
165*25202Skarels tcp_pkt_size[0] ++;
166*25202Skarels #endif
167*25202Skarels
168*25202Skarels if (tp->snd_rst)
169*25202Skarels {
170*25202Skarels flags |= T_RST;
171*25202Skarels flags &= ~T_SYN;
172*25202Skarels }
173*25202Skarels
174*25202Skarels if (tp->snd_urg)
175*25202Skarels {
176*25202Skarels if (SEQ_GT(tp->snd_urp, tp->snd_nxt))
177*25202Skarels {
178*25202Skarels short up;
179*25202Skarels
180*25202Skarels /*
181*25202Skarels * SEQ_LEQ(tp->snd_urp, tp->snd_nxt+len)
182*25202Skarels * Strictly speaking, we should be able to have the
183*25202Skarels * urgent pointer point into the stream beyond the
184*25202Skarels * current end of packet, but this is just in case
185*25202Skarels * some implementation is not ready for that.
186*25202Skarels */
187*25202Skarels flags |= T_URG;
188*25202Skarels up = MIN(len -1, tp->snd_urp - tp->snd_nxt -1);
189*25202Skarels t->t_urp = htons((u_short)up);
190*25202Skarels }
191*25202Skarels }
192*25202Skarels
193*25202Skarels if (tp->syn_rcvd)
194*25202Skarels {
195*25202Skarels #if T_DELACK > 0
196*25202Skarels tp->lastack = tp->rcv_nxt;
197*25202Skarels #endif
198*25202Skarels flags |= T_ACK;
199*25202Skarels }
200*25202Skarels
201*25202Skarels t->t_flags = flags;
202*25202Skarels
203*25202Skarels /* Calculate the window we should advertise */
204*25202Skarels
205*25202Skarels sorcv = &inp->inp_socket->so_rcv;
206*25202Skarels /*
207*25202Skarels * Count bytes left in user rcv queue, and reduce by sequencing queue
208*25202Skarels * Counting the sequencing q contracts the window when packets are
209*25202Skarels * lost (== when the network is over-loaded).
210*25202Skarels */
211*25202Skarels i = sbspace(sorcv) - tp->t_rcv_len;
212*25202Skarels /*
213*25202Skarels * apply receiver's solution to SWS in case sender does not have such
214*25202Skarels * an algorithm. One 8th was determined by benchmarks writing 2k
215*25202Skarels * buffers on an Ethernet connection. It has a slightly negative
216*25202Skarels * influence on IMP networks when writing 1k buffers.
217*25202Skarels *
218*25202Skarels * (sorcv->sb_hiwat >> 3) limited by 256 == 2k / 8, since application
219*25202Skarels * may adjust its buffering to large amounts via ioctl call. An eighth
220*25202Skarels * of a large number may be a reasonable sized packet to send.
221*25202Skarels *
222*25202Skarels * Only apply this algorithm if are getting packets in order,
223*25202Skarels * so that advertisement of 0 window does not prevent retransmission
224*25202Skarels * of dropped packet.
225*25202Skarels */
226*25202Skarels tmpt = tp->t_rcv_next;
227*25202Skarels if ((i < MIN(256, (sorcv->sb_hiwat >> 3))) &&
228*25202Skarels ((tmpt == (struct th *) tp) || SEQ_LEQ(tmpt->t_seq, tp->rcv_nxt)))
229*25202Skarels i = 0;
230*25202Skarels else
231*25202Skarels {
232*25202Skarels /*
233*25202Skarels * if this connection gets fragmented, constrain the windowsize
234*25202Skarels */
235*25202Skarels if (tp->t_maxfrag)
236*25202Skarels i = MIN(i, tp->t_maxfrag*3);
237*25202Skarels
238*25202Skarels if (i < 0)
239*25202Skarels i = 0;
240*25202Skarels }
241*25202Skarels
242*25202Skarels #if T_DELACK > 0
243*25202Skarels tp->rcv_wnd = i;
244*25202Skarels #endif
245*25202Skarels t->t_win = htons((u_short)i);
246*25202Skarels /*
247*25202Skarels * If we sent a zero window, we should try to send a non-zero ACK ASAP.
248*25202Skarels */
249*25202Skarels if (i == 0)
250*25202Skarels tp->sent_zero = TRUE;
251*25202Skarels else
252*25202Skarels tp->sent_zero = FALSE;
253*25202Skarels
254*25202Skarels t->t_sum = in_cksum(m, len + sizeof(struct th));
255*25202Skarels
256*25202Skarels if (inp->inp_socket->so_options & SO_DEBUG)
257*25202Skarels {
258*25202Skarels struct work w;
259*25202Skarels
260*25202Skarels w.w_type = INRECV; /* not really. use -1 newstate */
261*25202Skarels w.w_tcb = tp;
262*25202Skarels w.w_dat = (char *)t;
263*25202Skarels tcp_debug(tp, &w, -1);
264*25202Skarels }
265*25202Skarels
266*25202Skarels /*
267*25202Skarels * and ship packet off via IP. Remember that since this protocol
268*25202Skarels * involves retransmissions, errors can occur asynchronous to a
269*25202Skarels * (write) system call, and that therefore we can not send the
270*25202Skarels * error all the way back up through subroutine return values. We
271*25202Skarels * must also post it back via advise_user() at some point, and this
272*25202Skarels * looks like a good point to try it.
273*25202Skarels */
274*25202Skarels {
275*25202Skarels int error;
276*25202Skarels
277*25202Skarels error = ip_send(inp, m, len+TCPSIZE, FALSE);
278*25202Skarels if (error)
279*25202Skarels /*
280*25202Skarels * Since we use retransmissions, don't need to tell user
281*25202Skarels * process about this. (Can be as simple as interface
282*25202Skarels * or host structure queues are too long due to current
283*25202Skarels * heavy traffic. Backing off will take care of that.)
284*25202Skarels */
285*25202Skarels if (error != ENOBUFS)
286*25202Skarels advise_user(inp->inp_socket, error);
287*25202Skarels return (error);
288*25202Skarels }
289*25202Skarels }
290*25202Skarels
291*25202Skarels /*
292*25202Skarels * Find the first empty spot in rcv buffer
293*25202Skarels */
firstempty(tp)294*25202Skarels sequence firstempty(tp)
295*25202Skarels register struct tcpcb *tp;
296*25202Skarels {
297*25202Skarels sequence retval;
298*25202Skarels
299*25202Skarels FIRSTEMPTY(tp, retval);
300*25202Skarels return(retval);
301*25202Skarels }
302*25202Skarels
303*25202Skarels
304*25202Skarels /*
305*25202Skarels * TCP timer update routine
306*25202Skarels */
tcp_timeo()307*25202Skarels tcp_timeo()
308*25202Skarels {
309*25202Skarels register struct inpcb *inp, *next;
310*25202Skarels register struct tcpcb *tp;
311*25202Skarels register i;
312*25202Skarels register s;
313*25202Skarels extern sequence tcp_iss; /* tcp initial send seq # */
314*25202Skarels static int tcpmins; /* tcp minute timer */
315*25202Skarels
316*25202Skarels /* search through tcb and update active timers */
317*25202Skarels s = splnet();
318*25202Skarels inp = tcp.inp_next;
319*25202Skarels while (inp != &tcp)
320*25202Skarels {
321*25202Skarels next = inp->inp_next;
322*25202Skarels if (tp = inptotcpcb(inp))
323*25202Skarels {
324*25202Skarels if (tp->sws_qff < SWS_QFF_DEF)
325*25202Skarels tp->sws_qff ++;
326*25202Skarels
327*25202Skarels for (i = TINIT; i <= TDELACK; i++)
328*25202Skarels if (tp->t_timers[i] != 0 && --tp->t_timers[i] == 0)
329*25202Skarels {
330*25202Skarels struct work w;
331*25202Skarels
332*25202Skarels w.w_type = ISTIMER;
333*25202Skarels w.w_stype = i;
334*25202Skarels w.w_tcb = tp;
335*25202Skarels w.w_dat = (char *) NULL;
336*25202Skarels if (action(&w) == CLOSED)
337*25202Skarels goto next_tcb;
338*25202Skarels }
339*25202Skarels
340*25202Skarels if (tp->t_timers[TXMT] < MAX_TCPTIMERVAL-1)
341*25202Skarels tp->t_timers[TXMT]++;
342*25202Skarels
343*25202Skarels if (tcpmins == 0)
344*25202Skarels {
345*25202Skarels if (tp->t_timers[TNOACT] != 0 && --tp->t_timers[TNOACT] == 0)
346*25202Skarels w_alloc(ISTIMER, TNOACT, tp, 0);
347*25202Skarels }
348*25202Skarels }
349*25202Skarels next_tcb:
350*25202Skarels inp = next;
351*25202Skarels }
352*25202Skarels splx(s);
353*25202Skarels
354*25202Skarels if (--tcpmins < 0)
355*25202Skarels tcpmins = 120-1; /* zero-origin strikes again */
356*25202Skarels tcp_iss += ISSINCR; /* increment iss */
357*25202Skarels }
358*25202Skarels
359*25202Skarels
360*25202Skarels /*
361*25202Skarels * Do TCP option processing
362*25202Skarels */
tcp_opt(tp,t,hlen)363*25202Skarels tcp_opt(tp, t, hlen)
364*25202Skarels register struct tcpcb *tp;
365*25202Skarels register struct th *t;
366*25202Skarels int hlen;
367*25202Skarels {
368*25202Skarels register char *p;
369*25202Skarels register i, j, len;
370*25202Skarels
371*25202Skarels p = (char *)((int)t + sizeof(struct th)); /* -> at options */
372*25202Skarels
373*25202Skarels if ((i = hlen - TCPSIZE) > 0)
374*25202Skarels { /* any options */
375*25202Skarels
376*25202Skarels while (i > 0)
377*25202Skarels
378*25202Skarels switch (*p++)
379*25202Skarels {
380*25202Skarels case TCP_END_OPT:
381*25202Skarels default: /* garbage: throw away rest */
382*25202Skarels return;
383*25202Skarels
384*25202Skarels case TCP_NOP_OPT:
385*25202Skarels i--;
386*25202Skarels break;
387*25202Skarels
388*25202Skarels case TCP_MAXSEG_OPT: /* max segment size */
389*25202Skarels if (t->t_flags&T_SYN && !tp->syn_rcvd)
390*25202Skarels {
391*25202Skarels len = ntohs(*(u_short *)((int)p + 1));
392*25202Skarels /*
393*25202Skarels * may not have a route yet
394*25202Skarels */
395*25202Skarels if (!tp->t_in_pcb->inp_route.ro_rt)
396*25202Skarels /* in LISTEN state */
397*25202Skarels tp->t_maxseg = len;
398*25202Skarels else
399*25202Skarels /* connecting to server */
400*25202Skarels tp->t_maxseg =
401*25202Skarels MIN(tp->t_in_pcb->inp_route.ro_rt->rt_ifp->if_mtu -
402*25202Skarels TCPIPMAX, len);
403*25202Skarels tp->t_maxseg -= tp->t_in_pcb->inp_optlen;
404*25202Skarels }
405*25202Skarels if ((j = *p) == 0)
406*25202Skarels break;
407*25202Skarels i -= j;
408*25202Skarels p += j - 1;
409*25202Skarels }
410*25202Skarels }
411*25202Skarels }
412*25202Skarels
413*25202Skarels /*
414*25202Skarels * Called at splimp from uipc_mbuf.c
415*25202Skarels * Network needs some space freed! Remove unprocessed packets.
416*25202Skarels */
tcp_drain()417*25202Skarels tcp_drain()
418*25202Skarels {
419*25202Skarels register struct inpcb *inp;
420*25202Skarels register struct tcpcb *tp;
421*25202Skarels register struct mbuf *m;
422*25202Skarels
423*25202Skarels for (inp = tcp.inp_next; inp != &tcp; inp = inp->inp_next)
424*25202Skarels {
425*25202Skarels tp = (struct tcpcb *)inp->inp_ppcb;
426*25202Skarels
427*25202Skarels if (tp == NULL)
428*25202Skarels continue;
429*25202Skarels
430*25202Skarels while (m = tp->t_rcv_unack)
431*25202Skarels {
432*25202Skarels tp->t_rcv_unack = m->m_act;
433*25202Skarels m->m_act = (struct mbuf *)NULL;
434*25202Skarels m_freem (m);
435*25202Skarels }
436*25202Skarels }
437*25202Skarels }
438