1*25202Skarels #ifdef RCSIDENT
2*25202Skarels static char rcsident[] = "$Header: tcp_usrreq.c,v 1.30 85/07/31 09:43:43 walsh Exp $";
3*25202Skarels #endif RCSIDENT
4*25202Skarels
5*25202Skarels #include "../h/param.h"
6*25202Skarels #include "../h/systm.h"
7*25202Skarels #include "../h/mbuf.h"
8*25202Skarels #include "../h/socket.h"
9*25202Skarels #include "../h/socketvar.h"
10*25202Skarels #include "../h/protosw.h"
11*25202Skarels #include "../h/errno.h"
12*25202Skarels #include "../h/ioctl.h"
13*25202Skarels #include "../h/time.h"
14*25202Skarels #include "../h/kernel.h"
15*25202Skarels
16*25202Skarels #include "../net/if.h"
17*25202Skarels #include "../net/route.h"
18*25202Skarels
19*25202Skarels #include "../bbnnet/in.h"
20*25202Skarels #include "../bbnnet/in_var.h"
21*25202Skarels #include "../bbnnet/in_pcb.h"
22*25202Skarels #include "../bbnnet/net.h"
23*25202Skarels #include "../bbnnet/fsm.h"
24*25202Skarels #include "../bbnnet/tcp.h"
25*25202Skarels #include "../bbnnet/ip.h"
26*25202Skarels #include "../bbnnet/icmp.h"
27*25202Skarels #include "../bbnnet/macros.h"
28*25202Skarels #include "../bbnnet/sws.h"
29*25202Skarels
30*25202Skarels /*
31*25202Skarels * TCP protocol interface to socket abstraction.
32*25202Skarels */
33*25202Skarels
34*25202Skarels #ifdef GPROF
35*25202Skarels int tcp_acounts[TCP_NSTATES][PRU_NREQ];
36*25202Skarels #endif
37*25202Skarels
38*25202Skarels extern tcp_pcbdisconnect();
39*25202Skarels extern tcp_binding_used();
40*25202Skarels
41*25202Skarels struct inpcb tcp;
42*25202Skarels struct tcp_stat tcpstat;
43*25202Skarels sequence tcp_iss; /* tcp initial send seq # */
44*25202Skarels
45*25202Skarels struct dfilter tcp_dfilter;
46*25202Skarels
47*25202Skarels struct pr_advice tcp_advice =
48*25202Skarels {
49*25202Skarels TCP_RESERVED, /* application reserved */
50*25202Skarels TCP_USERRESERVED, /* user reserved */
51*25202Skarels TCP_MAXPORT, /* max port */
52*25202Skarels TCP_USERRESERVED+1, /* random last used */
53*25202Skarels sizeof(u_short), /* port size */
54*25202Skarels tcp_binding_used, /* confirmation routine */
55*25202Skarels } ;
56*25202Skarels
dowedebug(inp,so,filter)57*25202Skarels dowedebug(inp, so, filter)
58*25202Skarels register struct inpcb *inp;
59*25202Skarels struct socket *so;
60*25202Skarels register struct dfilter *filter;
61*25202Skarels {
62*25202Skarels register int count;
63*25202Skarels
64*25202Skarels count = 0;
65*25202Skarels if (inp->inp_faddr.s_addr == filter->foreign_host.s_addr)
66*25202Skarels count ++;
67*25202Skarels if (inp->inp_fport == filter->foreign_port)
68*25202Skarels count ++;
69*25202Skarels if (inp->inp_laddr.s_addr == filter->local_host.s_addr)
70*25202Skarels count ++;
71*25202Skarels if (inp->inp_lport == filter->local_port)
72*25202Skarels count ++;
73*25202Skarels
74*25202Skarels if (count >= filter->matches)
75*25202Skarels so->so_options |= SO_DEBUG;
76*25202Skarels }
77*25202Skarels
78*25202Skarels int tcp_noact = 0; /* patchable */
79*25202Skarels
80*25202Skarels /*
81*25202Skarels * Allocate and initialize a new TCB
82*25202Skarels * tcp_usrreq calls tcp_attach calls us. tcp_usrreq splnet()'s
83*25202Skarels */
tcp_newtcpcb(inp)84*25202Skarels struct tcpcb *tcp_newtcpcb(inp)
85*25202Skarels register struct inpcb *inp;
86*25202Skarels {
87*25202Skarels register struct tcpcb *tp;
88*25202Skarels register struct mbuf *m;
89*25202Skarels
90*25202Skarels m = m_getclr(M_WAIT, MT_PCB);
91*25202Skarels if (m == NULL)
92*25202Skarels return(NULL);
93*25202Skarels tp = mtod(m, struct tcpcb *);
94*25202Skarels
95*25202Skarels /* initialize non-zero tcb fields */
96*25202Skarels
97*25202Skarels tp->t_rcv_next = (struct th *)tp;
98*25202Skarels tp->t_rcv_prev = (struct th *)tp;
99*25202Skarels /*
100*25202Skarels * Don't start off assuming minimum srtt/rxmitime. If we do, and
101*25202Skarels * TCP_tvRXMIN is small and we decide to communicate over a
102*25202Skarels * reliable, but slow, network then we may not find true values for
103*25202Skarels * these. We may assume an ACK was for a retransmission that
104*25202Skarels * we're measuring the srtt of, not the original packet.
105*25202Skarels *
106*25202Skarels * Instead, start high and approach from above in a deterministic
107*25202Skarels * fashion. We should get close to the right values fairly rapidly.
108*25202Skarels *
109*25202Skarels * 7/85: start from above by special casing first round trip time
110*25202Skarels * measurement. If srtt == 0, do not reset rtt, and do not use
111*25202Skarels * weighted averaging. srtt starts as time to ack(xmit [+ rxmit...])
112*25202Skarels * and then gets smoothed with new round trip times. This compromise
113*25202Skarels * for getting to long-term srtt more quickly on LANs should work
114*25202Skarels * on the Internet as well. It will only hurt Internet connections
115*25202Skarels * if packet loss is high, and even then would only slow getting
116*25202Skarels * to long term srtt.
117*25202Skarels * This method can be turned off by initializing srtt with a non-zero
118*25202Skarels * value.
119*25202Skarels */
120*25202Skarels /* tp->t_srtt = TCP_tvMAXSRTT; */
121*25202Skarels tp->t_rxmitime = TCP_tvMAXSRTT + 1;
122*25202Skarels tp->t_rttltimeo = TCP_tvRTTL;
123*25202Skarels tp->t_xmt_val = tp->snd_end = tp->seq_fin = tp->snd_nxt =
124*25202Skarels tp->snd_hi = tp->snd_una = tp->iss = tcp_iss;
125*25202Skarels tcp_iss += ISSINCR;
126*25202Skarels
127*25202Skarels /*
128*25202Skarels * Imitate Berkeley code by setting push as a default. This should
129*25202Skarels * increase compatibility at the user code level.
130*25202Skarels */
131*25202Skarels tp->t_push = TRUE;
132*25202Skarels
133*25202Skarels /*
134*25202Skarels * Berkeley 4.2 code sends a data byte beyond the window's edge to see
135*25202Skarels * if the other end is up. If other end does not respond, connection
136*25202Skarels * times out and aborts. This is dangerous since the byte may make its
137*25202Skarels * way into the input stream if the recipient is coded keeping in mind
138*25202Skarels * how expensive packets are.
139*25202Skarels *
140*25202Skarels * We'll provide for an optional method to send a well formed ack that
141*25202Skarels * will catch remote failure and generate a tcp reset. Note that we
142*25202Skarels * don't care if the other end ignores the ack; we only hope for a well
143*25202Skarels * coded tcp to respond with a reset in the right circumstances. This
144*25202Skarels * sort of handshaking/probing should really be done at the application
145*25202Skarels * level, but not all specs (eg., SMTP) provide for such a noop.
146*25202Skarels *
147*25202Skarels * Optional, since some networks charge for packets and since some might
148*25202Skarels * see this as unecessary traffic.
149*25202Skarels *
150*25202Skarels * also see tcp_ioctl()
151*25202Skarels */
152*25202Skarels if (tp->t_noact = tcp_noact)
153*25202Skarels tp->t_noactprobe = TRUE;
154*25202Skarels
155*25202Skarels /* attach the tcpcb to the in_pcb */
156*25202Skarels
157*25202Skarels inp->inp_ppcb = (caddr_t)tp;
158*25202Skarels tp->t_in_pcb = inp;
159*25202Skarels
160*25202Skarels return(tp);
161*25202Skarels }
162*25202Skarels
163*25202Skarels /*
164*25202Skarels * Is a tcp port/address pair already in use by some socket on this machine?
165*25202Skarels * Passed to in_pcbbind() to help it find a port/address binding
166*25202Skarels * that is unique for tcp.
167*25202Skarels */
tcp_binding_used(inp,lport,lsaddr,reuselocal)168*25202Skarels int tcp_binding_used(inp, lport, lsaddr, reuselocal)
169*25202Skarels struct inpcb *inp;
170*25202Skarels u_short lport;
171*25202Skarels u_long lsaddr;
172*25202Skarels {
173*25202Skarels register struct inpcb *i;
174*25202Skarels
175*25202Skarels for (i = tcp.inp_next; i != &tcp; i = i->inp_next)
176*25202Skarels {
177*25202Skarels /*
178*25202Skarels * Since our inpcb is in this linked list, don't want to know
179*25202Skarels * if we, ourselves, are already using this binding.
180*25202Skarels */
181*25202Skarels if (i != inp)
182*25202Skarels if (i->inp_lport == lport)
183*25202Skarels /*
184*25202Skarels * Our/His address is unbound (INADDR_ANY) iff
185*25202Skarels * not yet connected to foreign host.
186*25202Skarels */
187*25202Skarels if ((i->inp_laddr.s_addr == lsaddr) ||
188*25202Skarels (i->inp_laddr.s_addr == INADDR_ANY) ||
189*25202Skarels (lsaddr == INADDR_ANY))
190*25202Skarels {
191*25202Skarels if (!reuselocal)
192*25202Skarels break;
193*25202Skarels if (i->inp_faddr.s_addr == INADDR_ANY)
194*25202Skarels /*
195*25202Skarels * We're both waiting for foreign
196*25202Skarels * connection. Could only re-use if
197*25202Skarels * he was already connected.
198*25202Skarels */
199*25202Skarels break;
200*25202Skarels }
201*25202Skarels }
202*25202Skarels return (i != &tcp);
203*25202Skarels }
204*25202Skarels
205*25202Skarels /*
206*25202Skarels * returns a (struct tcpcb *) cast to a (char *). This is
207*25202Skarels * so in_pcbconnect() can correctly handle return value. All
208*25202Skarels * other uses promptly cast back.
209*25202Skarels */
210*25202Skarels
tcp_conn_used(inp,lport,lsaddr,fport,fsaddr)211*25202Skarels char *tcp_conn_used(inp, lport, lsaddr, fport, fsaddr)
212*25202Skarels struct inpcb *inp;
213*25202Skarels u_short lport;
214*25202Skarels u_long lsaddr;
215*25202Skarels u_short fport;
216*25202Skarels u_long fsaddr;
217*25202Skarels {
218*25202Skarels register struct inpcb *i;
219*25202Skarels
220*25202Skarels for (i = tcp.inp_next; i != &tcp; i = i->inp_next)
221*25202Skarels {
222*25202Skarels /*
223*25202Skarels * Since our inpcb is in this linked list, don't want to know
224*25202Skarels * if we, ourselves, are already using this connetion.
225*25202Skarels */
226*25202Skarels if (i != inp)
227*25202Skarels if ((i->inp_lport == lport) &&
228*25202Skarels (i->inp_fport == fport) &&
229*25202Skarels (i->inp_laddr.s_addr == lsaddr) &&
230*25202Skarels (i->inp_faddr.s_addr == fsaddr))
231*25202Skarels return((char *)i->inp_ppcb);
232*25202Skarels }
233*25202Skarels return ((char *) NULL);
234*25202Skarels }
235*25202Skarels
236*25202Skarels tcp_ioctl (tp, command, data)
237*25202Skarels struct tcpcb *tp;
238*25202Skarels int command;
239*25202Skarels caddr_t data;
240*25202Skarels {
241*25202Skarels switch (command)
242*25202Skarels {
243*25202Skarels /* push */
244*25202Skarels case SIOCSPUSH:
245*25202Skarels tp->t_push = TRUE;
246*25202Skarels break;
247*25202Skarels
248*25202Skarels case SIOCCPUSH:
249*25202Skarels tp->t_push = FALSE;
250*25202Skarels break;
251*25202Skarels
252*25202Skarels /* no activity timer */
253*25202Skarels case SIOCSNOACT:
254*25202Skarels {
255*25202Skarels u_long value;
256*25202Skarels
257*25202Skarels value = *((u_long *) data);
258*25202Skarels /*
259*25202Skarels * A shutdown socket should still be able to request some sort of
260*25202Skarels * check on the status of the remote end. Also see tcp_newtcpcb().
261*25202Skarels */
262*25202Skarels tp->t_noactprobe = (value & TCP_NOACTPROBE) ? TRUE : FALSE;
263*25202Skarels tp->t_noactsig = (value & TCP_NOACTSIG) ? TRUE : FALSE;
264*25202Skarels
265*25202Skarels if ((tp->t_state <= ESTAB) || (tp->t_state == CLOSE_WAIT))
266*25202Skarels {
267*25202Skarels /* don't interfere with system use of timer */
268*25202Skarels value &= ~(TCP_NOACTPROBE|TCP_NOACTSIG);
269*25202Skarels tp->t_noact = MIN (MAX_TCPTIMERVAL, value);
270*25202Skarels tp->t_timers[TNOACT] = tp->t_noact;
271*25202Skarels }
272*25202Skarels }
273*25202Skarels break;
274*25202Skarels
275*25202Skarels case SIOCGNOACT:
276*25202Skarels {
277*25202Skarels u_long value;
278*25202Skarels
279*25202Skarels value = tp->t_noact;
280*25202Skarels if (tp->t_noactprobe)
281*25202Skarels value |= TCP_NOACTPROBE;
282*25202Skarels if (tp->t_noactsig)
283*25202Skarels value |= TCP_NOACTSIG;
284*25202Skarels
285*25202Skarels *((u_long *) data) = value;
286*25202Skarels }
287*25202Skarels break;
288*25202Skarels
289*25202Skarels /* init timer */
290*25202Skarels case SIOCSINIT:
291*25202Skarels tp->t_itimeo = MIN (MAX_TCPTIMERVAL, *((unsigned *) data));
292*25202Skarels break;
293*25202Skarels
294*25202Skarels case SIOCGINIT:
295*25202Skarels *((int *) data) = tp->t_itimeo;
296*25202Skarels break;
297*25202Skarels
298*25202Skarels /* retransmit took too long timer */
299*25202Skarels case SIOCSRTTL:
300*25202Skarels tp->t_rttltimeo = MIN (MAX_TCPTIMERVAL, *((unsigned *) data));
301*25202Skarels break;
302*25202Skarels
303*25202Skarels case SIOCGRTTL:
304*25202Skarels *((int *) data) = tp->t_rttltimeo;
305*25202Skarels break;
306*25202Skarels
307*25202Skarels case SIOCABORT:
308*25202Skarels {
309*25202Skarels struct socket *so;
310*25202Skarels
311*25202Skarels /* there really should be a generic way for
312*25202Skarels * a user to get to soabort()
313*25202Skarels */
314*25202Skarels
315*25202Skarels tp->usr_abort = TRUE;
316*25202Skarels /*
317*25202Skarels * Just in case asked to abort a LISTENing socket,
318*25202Skarels * Don't leave unattached, unaccepted connections.
319*25202Skarels */
320*25202Skarels so = tp->t_in_pcb->inp_socket;
321*25202Skarels while (so->so_q0 && (so->so_q0 != so))
322*25202Skarels (void) soabort(so->so_q0);
323*25202Skarels while (so->so_q && (so->so_q != so))
324*25202Skarels (void) soabort(so->so_q);
325*25202Skarels
326*25202Skarels w_alloc(IUABORT, 0, tp, tp->t_in_pcb);
327*25202Skarels }
328*25202Skarels break;
329*25202Skarels
330*25202Skarels default:
331*25202Skarels /* not our ioctl, let lower level try ioctl */
332*25202Skarels return ip_ioctl (tp->t_in_pcb, command, data);
333*25202Skarels }
334*25202Skarels
335*25202Skarels return (0);
336*25202Skarels }
337*25202Skarels
338*25202Skarels
339*25202Skarels /*
340*25202Skarels * Process a TCP user request for TCP tb. If this is a send request
341*25202Skarels * then m is the mbuf chain of send data. If this is a timer expiration
342*25202Skarels * (called from the software clock routine), then timertype tells which timer.
343*25202Skarels */
344*25202Skarels /*ARGSUSED*/
345*25202Skarels tcp_usrreq(so, req, m, nam, rights)
346*25202Skarels struct socket *so;
347*25202Skarels int req;
348*25202Skarels struct mbuf *m, *nam, *rights;
349*25202Skarels {
350*25202Skarels register struct inpcb *inp;
351*25202Skarels register struct tcpcb *tp;
352*25202Skarels register int s;
353*25202Skarels register int act, newstate;
354*25202Skarels int error = 0;
355*25202Skarels
356*25202Skarels s = splnet();
357*25202Skarels inp = sotoinpcb(so);
358*25202Skarels
359*25202Skarels /* keep in mind call from ifioctl() */
360*25202Skarels if (rights && req != PRU_CONTROL)
361*25202Skarels {
362*25202Skarels if (rights->m_len)
363*25202Skarels {
364*25202Skarels splx(s);
365*25202Skarels return (EINVAL);
366*25202Skarels }
367*25202Skarels }
368*25202Skarels /*
369*25202Skarels * When a TCP is attached to a socket, then there will be
370*25202Skarels * a (struct inpcb) pointed at by the socket, and this
371*25202Skarels * structure will point at a subsidary (struct tcpcb).
372*25202Skarels */
373*25202Skarels if (inp == NULL && req != PRU_ATTACH)
374*25202Skarels {
375*25202Skarels splx(s);
376*25202Skarels return (EINVAL); /* XXX */
377*25202Skarels }
378*25202Skarels if (inp)
379*25202Skarels {
380*25202Skarels tp = inptotcpcb(inp);
381*25202Skarels /* WHAT IF TP IS 0? */
382*25202Skarels #ifdef GPROF
383*25202Skarels tcp_acounts[tp->t_state][req]++;
384*25202Skarels #endif
385*25202Skarels }
386*25202Skarels
387*25202Skarels /*
388*25202Skarels * This switch becomes a 'caseb', so put common ones at top.
389*25202Skarels */
390*25202Skarels switch (req)
391*25202Skarels {
392*25202Skarels
393*25202Skarels case PRU_RCVD:
394*25202Skarels /*
395*25202Skarels * After a receive, possibly send window update to peer.
396*25202Skarels */
397*25202Skarels W_ALLOC(IURECV, 0, tp, NULL, so, act, newstate);
398*25202Skarels break;
399*25202Skarels
400*25202Skarels case PRU_SEND:
401*25202Skarels /*
402*25202Skarels * Do a send by initiating the proper entry to the FSM.
403*25202Skarels * Don't let urgent continue.
404*25202Skarels */
405*25202Skarels tp->t_urg = FALSE;
406*25202Skarels W_ALLOC(IUSEND, 0, tp, m, so, act, newstate);
407*25202Skarels break;
408*25202Skarels
409*25202Skarels /*
410*25202Skarels * TCP attaches to socket via PRU_ATTACH, reserving space,
411*25202Skarels * and an internet control block.
412*25202Skarels */
413*25202Skarels case PRU_ATTACH:
414*25202Skarels if (inp)
415*25202Skarels {
416*25202Skarels error = EISCONN;
417*25202Skarels break;
418*25202Skarels }
419*25202Skarels error = tcp_attach(so);
420*25202Skarels if (error)
421*25202Skarels break;
422*25202Skarels if ((so->so_options & SO_LINGER) && so->so_linger == 0)
423*25202Skarels so->so_linger = T_LINGERTIME;
424*25202Skarels tp = sototcpcb(so);
425*25202Skarels break;
426*25202Skarels
427*25202Skarels /*
428*25202Skarels * PRU_DETACH detaches the TCP protocol from the socket.
429*25202Skarels * This is only done after SO_ISCONNECTED has been cleared.
430*25202Skarels */
431*25202Skarels case PRU_DETACH:
432*25202Skarels tcp_disconnect(tp);
433*25202Skarels break;
434*25202Skarels
435*25202Skarels /*
436*25202Skarels * Give the socket an address.
437*25202Skarels */
438*25202Skarels case PRU_BIND:
439*25202Skarels error = in_pcbbind(inp, nam, &tcp_advice);
440*25202Skarels break;
441*25202Skarels
442*25202Skarels /*
443*25202Skarels * Prepare to accept connections.
444*25202Skarels */
445*25202Skarels case PRU_LISTEN:
446*25202Skarels if (inp->inp_lport == 0)
447*25202Skarels error = in_pcbbind(inp, (struct mbuf *)0, &tcp_advice);
448*25202Skarels if (error == 0)
449*25202Skarels w_alloc(IUOPENA, 0, tp, NULL);
450*25202Skarels break;
451*25202Skarels
452*25202Skarels /*
453*25202Skarels * Initiate connection to peer.
454*25202Skarels * Bind the local end if not already.
455*25202Skarels * Set the routing.
456*25202Skarels * Crank up the TCP state machine.
457*25202Skarels */
458*25202Skarels case PRU_CONNECT:
459*25202Skarels {
460*25202Skarels struct in_addr laddr;
461*25202Skarels
462*25202Skarels laddr = inp->inp_laddr;
463*25202Skarels if (inp->inp_lport == 0)
464*25202Skarels {
465*25202Skarels error = in_pcbbind(inp, (struct mbuf *)0, &tcp_advice);
466*25202Skarels if (error)
467*25202Skarels break;
468*25202Skarels }
469*25202Skarels error = in_pcbconnect(inp, nam, tcp_conn_used);
470*25202Skarels if (error)
471*25202Skarels break;
472*25202Skarels
473*25202Skarels if (in_broadcast(inp->inp_faddr))
474*25202Skarels {
475*25202Skarels in_pcbdisconnect (inp, tcp_pcbdisconnect);
476*25202Skarels inp->inp_laddr = laddr;
477*25202Skarels error = EADDRNOTAVAIL;
478*25202Skarels break;
479*25202Skarels }
480*25202Skarels
481*25202Skarels if (! (tp->t_template = tcp_template(tp)))
482*25202Skarels {
483*25202Skarels in_pcbdisconnect (inp, tcp_pcbdisconnect);
484*25202Skarels inp->inp_laddr = laddr;
485*25202Skarels error = ENOBUFS;
486*25202Skarels break;
487*25202Skarels }
488*25202Skarels
489*25202Skarels tp->sws_qff = SWS_QFF_DEF;
490*25202Skarels
491*25202Skarels /*
492*25202Skarels * So can debug connection problems without having to change
493*25202Skarels * every program or apply debugging flag to each program every
494*25202Skarels * time run it.
495*25202Skarels */
496*25202Skarels dowedebug(inp, so, &tcp_dfilter);
497*25202Skarels
498*25202Skarels soisconnecting(so);
499*25202Skarels w_alloc(IUOPENR, 0, tp, NULL);
500*25202Skarels }
501*25202Skarels break;
502*25202Skarels
503*25202Skarels /*
504*25202Skarels * Create a TCP connection between two sockets.
505*25202Skarels */
506*25202Skarels case PRU_CONNECT2:
507*25202Skarels error = EOPNOTSUPP;
508*25202Skarels break;
509*25202Skarels
510*25202Skarels /*
511*25202Skarels * Initiate disconnect from peer.
512*25202Skarels * If connection never passed embryonic stage, just drop;
513*25202Skarels * else if don't need to let data drain, then can just drop anyways,
514*25202Skarels * else have to begin TCP shutdown process: mark socket disconnecting,
515*25202Skarels * drain unread data, state switch to reflect user close, and
516*25202Skarels * send segment (e.g. FIN) to peer. Socket will be really disconnected
517*25202Skarels * when peer sends FIN and acks ours.
518*25202Skarels */
519*25202Skarels case PRU_DISCONNECT:
520*25202Skarels tcp_disconnect(tp);
521*25202Skarels break;
522*25202Skarels
523*25202Skarels /*
524*25202Skarels * Accept a connection. Essentially all the work is
525*25202Skarels * done at higher levels; just return the address
526*25202Skarels * of the peer, storing through addr.
527*25202Skarels *
528*25202Skarels * BBN-NOTE: upper levels do all the waiting; this stays the same.
529*25202Skarels */
530*25202Skarels case PRU_ACCEPT:
531*25202Skarels {
532*25202Skarels struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
533*25202Skarels
534*25202Skarels nam->m_len = sizeof (struct sockaddr_in);
535*25202Skarels sin->sin_family = AF_INET;
536*25202Skarels sin->sin_port = inp->inp_fport;
537*25202Skarels sin->sin_addr = inp->inp_faddr;
538*25202Skarels break;
539*25202Skarels }
540*25202Skarels
541*25202Skarels /*
542*25202Skarels * Mark the connection as being incapable of further output.
543*25202Skarels */
544*25202Skarels case PRU_SHUTDOWN:
545*25202Skarels socantsendmore(so);
546*25202Skarels if (! tp->usr_closed)
547*25202Skarels w_alloc(IUCLOSE, 0, tp, inp);
548*25202Skarels break;
549*25202Skarels
550*25202Skarels /*
551*25202Skarels * Abort the TCP.
552*25202Skarels */
553*25202Skarels case PRU_ABORT:
554*25202Skarels w_alloc(IUABORT, 0, tp, inp);
555*25202Skarels break;
556*25202Skarels
557*25202Skarels case PRU_CONTROL:
558*25202Skarels error = tcp_ioctl(tp, (int) m, (caddr_t) nam);
559*25202Skarels break;
560*25202Skarels
561*25202Skarels
562*25202Skarels /* SOME AS YET UNIMPLEMENTED HOOKS */
563*25202Skarels case PRU_SENSE:
564*25202Skarels error = EOPNOTSUPP;
565*25202Skarels break;
566*25202Skarels /* END UNIMPLEMENTED HOOKS */
567*25202Skarels
568*25202Skarels case PRU_RCVOOB:
569*25202Skarels
570*25202Skarels {
571*25202Skarels int desired;
572*25202Skarels
573*25202Skarels if (so->so_oobmark == 0 && (so->so_state & SS_RCVATMARK) == 0)
574*25202Skarels {
575*25202Skarels error = EINVAL;
576*25202Skarels break;
577*25202Skarels }
578*25202Skarels if (tp->oob_data == NULL)
579*25202Skarels {
580*25202Skarels error = EWOULDBLOCK;
581*25202Skarels break;
582*25202Skarels }
583*25202Skarels desired = *(mtod(m, int *));
584*25202Skarels
585*25202Skarels while ((desired > 0) && (tp->oob_data))
586*25202Skarels {
587*25202Skarels char *p;
588*25202Skarels unsigned count;
589*25202Skarels
590*25202Skarels p = mtod(m, caddr_t);
591*25202Skarels count = MIN(desired, tp->oob_data->m_len);
592*25202Skarels count = MIN(count, MLEN);
593*25202Skarels bcopy(mtod(tp->oob_data, caddr_t), p, count);
594*25202Skarels m->m_len = count;
595*25202Skarels desired -= count;
596*25202Skarels
597*25202Skarels tp->oob_data->m_len -= count;
598*25202Skarels tp->oob_data->m_off += count;
599*25202Skarels if (tp->oob_data->m_len <= 0)
600*25202Skarels tp->oob_data = m_free(tp->oob_data);
601*25202Skarels
602*25202Skarels if ((desired > 0) && (tp->oob_data))
603*25202Skarels {
604*25202Skarels m->m_next = m_get(M_WAIT, MT_DATA);
605*25202Skarels m = m->m_next;
606*25202Skarels }
607*25202Skarels }
608*25202Skarels
609*25202Skarels }
610*25202Skarels break;
611*25202Skarels
612*25202Skarels case PRU_SENDOOB:
613*25202Skarels /*
614*25202Skarels * allows up to MAX_TCPOOB bytes of out of band data
615*25202Skarels * even if user has used up all his allocated space.
616*25202Skarels */
617*25202Skarels if (sbspace(&so->so_snd) < (- MAX_TCPOOB))
618*25202Skarels {
619*25202Skarels m_freem(m);
620*25202Skarels error = ENOBUFS;
621*25202Skarels break;
622*25202Skarels
623*25202Skarels }
624*25202Skarels tp->t_urg = TRUE;
625*25202Skarels w_alloc(IUSEND, 0, tp, m);
626*25202Skarels break;
627*25202Skarels
628*25202Skarels /*
629*25202Skarels * Return the address of this socket (local-side binding)
630*25202Skarels */
631*25202Skarels case PRU_SOCKADDR:
632*25202Skarels in_setsockaddr(inp, nam);
633*25202Skarels break;
634*25202Skarels
635*25202Skarels case PRU_PEERADDR:
636*25202Skarels in_setpeeraddr(inp, nam);
637*25202Skarels break;
638*25202Skarels
639*25202Skarels /*
640*25202Skarels * TCP slow timer went off; run down all those timers.
641*25202Skarels */
642*25202Skarels case PRU_SLOWTIMO:
643*25202Skarels tcp_timeo();
644*25202Skarels break;
645*25202Skarels
646*25202Skarels default:
647*25202Skarels panic("tcp_usrreq");
648*25202Skarels }
649*25202Skarels splx(s);
650*25202Skarels return (error);
651*25202Skarels }
652*25202Skarels
653*25202Skarels /*
654*25202Skarels * getsockopt() / setsockopt()
655*25202Skarels */
tcp_ctloutput(req,so,level,optname,optval)656*25202Skarels tcp_ctloutput (req,so,level,optname,optval)
657*25202Skarels int req;
658*25202Skarels struct socket *so;
659*25202Skarels int level, optname;
660*25202Skarels struct mbuf **optval;
661*25202Skarels {
662*25202Skarels int s = splnet(); /* like PRU/packet/timer entry into net code */
663*25202Skarels int error;
664*25202Skarels struct inpcb *inp;
665*25202Skarels
666*25202Skarels /*
667*25202Skarels * possibly for us?
668*25202Skarels * Follow Berkeley methods: level is protocol number if meant for the
669*25202Skarels * protocol layer. (Why not say if=0, arp=1, ip=2, udp/tcp/rdp=3....?)
670*25202Skarels *
671*25202Skarels * Problem: tcp needs to know about IP options in order to use right
672*25202Skarels * maxseg. This doesn't quite work with the layering.
673*25202Skarels *
674*25202Skarels * Why not combine ioctl/setsockopt/getsockopt paths, since ioctl can be
675*25202Skarels * seen as fixed size sockopt- tried at BBN; removed for 4.3
676*25202Skarels */
677*25202Skarels
678*25202Skarels /* should be "mature" socket so pointers all valid... */
679*25202Skarels inp = sotoinpcb(so);
680*25202Skarels
681*25202Skarels switch(req)
682*25202Skarels {
683*25202Skarels case PRCO_GETOPT:
684*25202Skarels error = tcp_getopt (inp, optname, optval);
685*25202Skarels break;
686*25202Skarels
687*25202Skarels case PRCO_SETOPT:
688*25202Skarels error = tcp_setopt (inp, optname, optval);
689*25202Skarels break;
690*25202Skarels
691*25202Skarels default:
692*25202Skarels panic("tcp_ctloutput");
693*25202Skarels }
694*25202Skarels
695*25202Skarels splx(s);
696*25202Skarels return (error);
697*25202Skarels }
698*25202Skarels
699*25202Skarels tcp_getopt (inp, command, data)
700*25202Skarels struct inpcb *inp;
701*25202Skarels struct mbuf **data;
702*25202Skarels {
703*25202Skarels /*
704*25202Skarels * no TCP specific options accessed by getsockopt() as yet.
705*25202Skarels * let lower level at cmd
706*25202Skarels */
707*25202Skarels return ip_getopt (inp, command, data);
708*25202Skarels }
709*25202Skarels
710*25202Skarels tcp_setopt (inp, command, data)
711*25202Skarels struct inpcb *inp;
712*25202Skarels struct mbuf **data;
713*25202Skarels {
714*25202Skarels int error;
715*25202Skarels struct tcpcb *tp;
716*25202Skarels
717*25202Skarels /* no TCP specific options accessed by setsockopt() as yet */
718*25202Skarels tp = inptotcpcb(inp);
719*25202Skarels
720*25202Skarels if (command == SO_IPROUTE)
721*25202Skarels tp->t_maxseg += inp->inp_optlen;
722*25202Skarels
723*25202Skarels error = ip_setopt(inp, command, data);
724*25202Skarels
725*25202Skarels if (command == SO_IPROUTE)
726*25202Skarels tp->t_maxseg -= inp->inp_optlen;
727*25202Skarels
728*25202Skarels return (error);
729*25202Skarels }
730*25202Skarels
731*25202Skarels /*
732*25202Skarels * These numbers come from measurements described in the paper
733*25202Skarels * "Converting the BBN TCP/IP to 4.2BSD" (S.L.C. USENIX)
734*25202Skarels * If your network handles packets larger than an ethernet frame, you
735*25202Skarels * could change tcp_init back to determine the largest net's packet size,
736*25202Skarels * multiply that by some number, and round up to a multiple of a CLSIZE.
737*25202Skarels */
738*25202Skarels int tcp_recvspace = 4096;
739*25202Skarels int tcp_sendspace = 4096;
740*25202Skarels
741*25202Skarels /*
742*25202Skarels * Attach TCP protocol to socket, allocating
743*25202Skarels * internet protocol control block, tcp control block, buffer space.
744*25202Skarels */
745*25202Skarels tcp_attach(so)
746*25202Skarels struct socket *so;
747*25202Skarels {
748*25202Skarels register struct tcpcb *tp;
749*25202Skarels struct inpcb *inp;
750*25202Skarels int error;
751*25202Skarels
752*25202Skarels if (! (error = soreserve(so, tcp_sendspace, tcp_recvspace)))
753*25202Skarels {
754*25202Skarels if (! (error = in_pcballoc(so, &tcp)))
755*25202Skarels {
756*25202Skarels inp = sotoinpcb(so);
757*25202Skarels if (tp = tcp_newtcpcb(inp))
758*25202Skarels {
759*25202Skarels /*
760*25202Skarels * Should change state tables to have an UNOPENED state like
761*25202Skarels * the butterfly's which is different from SAME.
762*25202Skarels */
763*25202Skarels tp->t_state = 0;
764*25202Skarels return (0);
765*25202Skarels }
766*25202Skarels error = ENOBUFS;
767*25202Skarels in_pcbdetach(inp, (int (*)())0);
768*25202Skarels }
769*25202Skarels }
770*25202Skarels return (error);
771*25202Skarels }
772*25202Skarels
773*25202Skarels /*
774*25202Skarels * Initiate (or continue) disconnect.
775*25202Skarels * If embryonic state, just send reset (once).
776*25202Skarels * If not in ``let data drain'' option, just drop.
777*25202Skarels * Otherwise (hard), mark socket disconnecting and drop
778*25202Skarels * current input data; switch states based on user close, and
779*25202Skarels * send segment to peer (with FIN).
780*25202Skarels */
781*25202Skarels
tcp_disconnect(tp)782*25202Skarels tcp_disconnect(tp)
783*25202Skarels register struct tcpcb *tp;
784*25202Skarels {
785*25202Skarels struct socket *so = tp->t_in_pcb->inp_socket;
786*25202Skarels
787*25202Skarels soisdisconnecting(so);
788*25202Skarels sbflush(&so->so_rcv);
789*25202Skarels tp->usr_abort = TRUE;
790*25202Skarels if (!tp->usr_closed)
791*25202Skarels w_alloc(IUCLOSE, 0, tp, tp->t_in_pcb);
792*25202Skarels }
793*25202Skarels
tcp_init()794*25202Skarels tcp_init()
795*25202Skarels {
796*25202Skarels /*
797*25202Skarels * Leave these checks in! It's a pain in the ass to find out
798*25202Skarels * problems caused by too small mbufs if someone changes the
799*25202Skarels * size of an mbuf.
800*25202Skarels */
801*25202Skarels if (sizeof(struct inpcb) > MLEN)
802*25202Skarels panic("inpcb too big");
803*25202Skarels
804*25202Skarels if (sizeof(struct socket) > MLEN)
805*25202Skarels panic("socket too big");
806*25202Skarels
807*25202Skarels if (sizeof(struct th) > MLEN)
808*25202Skarels panic("th too big");
809*25202Skarels
810*25202Skarels if (sizeof(struct tcpcb) > MLEN)
811*25202Skarels panic("tcpcb too big");
812*25202Skarels
813*25202Skarels if (sizeof(struct t_debug) > MLEN)
814*25202Skarels panic("t_debug too big");
815*25202Skarels
816*25202Skarels /* init queue */
817*25202Skarels tcp.inp_next = tcp.inp_prev = &tcp;
818*25202Skarels
819*25202Skarels /* are only 4 things to match. turn off for now */
820*25202Skarels tcp_dfilter.matches = 5;
821*25202Skarels
822*25202Skarels tcp_iss = time.tv_sec;
823*25202Skarels
824*25202Skarels ipsw[IPPROTO_TCP].ipsw_hlen = sizeof(struct th);
825*25202Skarels }
826*25202Skarels
tcp_ctlinput(prc_code,arg)827*25202Skarels tcp_ctlinput (prc_code, arg)
828*25202Skarels caddr_t arg;
829*25202Skarels {
830*25202Skarels int error;
831*25202Skarels
832*25202Skarels error = inetctlerrmap[prc_code];
833*25202Skarels
834*25202Skarels switch (prc_code)
835*25202Skarels {
836*25202Skarels case PRC_UNREACH_PROTOCOL: /* icmp message */
837*25202Skarels case PRC_UNREACH_PORT:
838*25202Skarels case PRC_MSGSIZE:
839*25202Skarels {
840*25202Skarels register struct th *tp;
841*25202Skarels struct tcpcb *t;
842*25202Skarels
843*25202Skarels tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr);
844*25202Skarels t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0,
845*25202Skarels tp->t_src, tp->t_s.s_addr,
846*25202Skarels tp->t_dst, tp->t_d.s_addr);
847*25202Skarels if (t)
848*25202Skarels t_close(t, error);
849*25202Skarels }
850*25202Skarels break;
851*25202Skarels
852*25202Skarels case PRC_UNREACH_NET:
853*25202Skarels case PRC_UNREACH_HOST:
854*25202Skarels {
855*25202Skarels register struct th *tp;
856*25202Skarels struct tcpcb *t;
857*25202Skarels
858*25202Skarels tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr);
859*25202Skarels t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0,
860*25202Skarels tp->t_src, tp->t_s.s_addr,
861*25202Skarels tp->t_dst, tp->t_d.s_addr);
862*25202Skarels if (t)
863*25202Skarels {
864*25202Skarels struct socket *so;
865*25202Skarels
866*25202Skarels so = t->t_in_pcb->inp_socket;
867*25202Skarels if ((so->so_state & SS_NOFDREF) == 0)
868*25202Skarels advise_user(so, error);
869*25202Skarels else
870*25202Skarels t_close(t, error);
871*25202Skarels }
872*25202Skarels }
873*25202Skarels break;
874*25202Skarels
875*25202Skarels case PRC_GWDOWN:
876*25202Skarels in_gdown (&tcp, (u_long) arg);
877*25202Skarels break;
878*25202Skarels
879*25202Skarels case PRC_REDIRECT_NET: /* icmp message */
880*25202Skarels case PRC_REDIRECT_HOST:
881*25202Skarels {
882*25202Skarels struct tcpcb *t;
883*25202Skarels register struct th *tp;
884*25202Skarels
885*25202Skarels tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr);
886*25202Skarels t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0,
887*25202Skarels tp->t_src, tp->t_s.s_addr,
888*25202Skarels tp->t_dst, tp->t_d.s_addr);
889*25202Skarels if (t)
890*25202Skarels icmp_redirect_inp(t->t_in_pcb, (struct icmp *) arg,
891*25202Skarels prc_code == PRC_REDIRECT_NET ? rtnet : rthost);
892*25202Skarels }
893*25202Skarels break;
894*25202Skarels
895*25202Skarels case PRC_TIMXCEED_INTRANS: /* icmp message */
896*25202Skarels case PRC_TIMXCEED_REASS:
897*25202Skarels case PRC_PARAMPROB:
898*25202Skarels break;
899*25202Skarels
900*25202Skarels case PRC_QUENCH: /* icmp message */
901*25202Skarels /*
902*25202Skarels * See RFC 896. The idea is, when we get a source quench message on
903*25202Skarels * a connection we should send fewer packets. This ties in with the
904*25202Skarels * silly window syndrome whose solution is to send fewer, larger packets.
905*25202Skarels * Deal with quenches by altering threshold used by silly window
906*25202Skarels * syndrome. This is similar to acting as if the window is smaller
907*25202Skarels * than it actually is for deciding when to send, except that when we
908*25202Skarels * do, we use as much as there really is.
909*25202Skarels */
910*25202Skarels {
911*25202Skarels register struct th *tp;
912*25202Skarels struct tcpcb *t;
913*25202Skarels
914*25202Skarels tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr);
915*25202Skarels t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0,
916*25202Skarels tp->t_src, tp->t_s.s_addr,
917*25202Skarels tp->t_dst, tp->t_d.s_addr);
918*25202Skarels if (t)
919*25202Skarels {
920*25202Skarels t->sws_qff -= SWS_QFF_DEC;
921*25202Skarels if (t->sws_qff < SWS_QFF_MIN)
922*25202Skarels t->sws_qff = SWS_QFF_MIN;
923*25202Skarels }
924*25202Skarels }
925*25202Skarels break;
926*25202Skarels
927*25202Skarels case PRC_IFDOWN:
928*25202Skarels {
929*25202Skarels u_long addr;
930*25202Skarels
931*25202Skarels addr = ((struct sockaddr_in *)(arg))->sin_addr.s_addr;
932*25202Skarels inpcb_notify(&tcp, addr, (u_long) 0, error);
933*25202Skarels inpcb_notify(&tcp, (u_long) 0, addr, error);
934*25202Skarels }
935*25202Skarels break;
936*25202Skarels
937*25202Skarels case PRC_HOSTDEAD: /* from imp interface */
938*25202Skarels case PRC_HOSTUNREACH:
939*25202Skarels /*
940*25202Skarels * get same message for destination hosts and gateways.
941*25202Skarels */
942*25202Skarels {
943*25202Skarels u_long addr;
944*25202Skarels
945*25202Skarels addr = ((struct sockaddr_in *)arg)->sin_addr.s_addr;
946*25202Skarels in_gdown (&tcp, addr);
947*25202Skarels inpcb_notify(&tcp, (u_long) 0, addr, error);
948*25202Skarels }
949*25202Skarels break;
950*25202Skarels
951*25202Skarels default:
952*25202Skarels panic("tcp_ctlinput");
953*25202Skarels }
954*25202Skarels }
955