125202Skarels #ifdef RCSIDENT
225202Skarels static char rcsident[] = "$Header: ip_output.c,v 1.28 85/07/31 09:32:09 walsh Exp $";
325202Skarels #endif
425202Skarels
525202Skarels #include "../h/param.h"
625202Skarels #include "../h/dir.h"
725202Skarels #include "../h/user.h"
825202Skarels #include "../h/mbuf.h"
925202Skarels #include "../h/socket.h"
1025202Skarels #include "../h/socketvar.h"
1125202Skarels #include "../h/protosw.h"
1225202Skarels #include "../h/domain.h"
1325202Skarels #include "../h/ioctl.h"
1425202Skarels #include "../h/syslog.h"
1525202Skarels
1625202Skarels #include "../net/if.h"
1725202Skarels #include "../net/route.h"
1825202Skarels
1925202Skarels #include "../bbnnet/in.h"
2025202Skarels #include "../bbnnet/net.h"
2125202Skarels #include "../bbnnet/in_pcb.h"
2225202Skarels #include "../bbnnet/in_var.h"
2325202Skarels #include "../bbnnet/ip.h"
2425202Skarels #include "../bbnnet/icmp.h"
2525202Skarels
2625202Skarels /*
2725202Skarels * If you're going to a specific host or via a gateway, the routing
2825202Skarels * entry gateway field holds the best way to get there. Otherwise,
2925202Skarels * the routing entry tells you how to get onto that net -- it has
3025202Skarels * the net address portion of our local host:
3125202Skarels *
3225202Skarels * On bbn-labs-b:
3325202Skarels *
3425202Skarels * rt_dst rt_gateway flags
3525202Skarels * il0 => 0x00000b80 0x2010b80 UP
3625202Skarels * imp0 => 0x00000008 0x2000708 UP
3725202Skarels * loop => 0x0000007f 0x100007f UP
3825202Skarels *
3925202Skarels * So you can see that the rt_gateway is our local address, and the
4025202Skarels * rt_dst may be the net number of the media. If it's a route
4125202Skarels * to a net, the other guy is on this net and you want to route the
4225202Skarels * packet to him anyway.
4325202Skarels *
4425202Skarels * gateway 0 0x1000b80 UP, RTF_GATEWAY
4525202Skarels */
4625202Skarels
4725202Skarels #define IF_SEND(ifp, mp, rt, retval) \
4825202Skarels {\
4925202Skarels static struct sockaddr_in tmproute = {AF_INET}; \
5025202Skarels \
5125202Skarels if (! ((ifp)->if_flags & IFF_UP)){ \
5225202Skarels /* goes with PRC_IFDOWN in in.c */ \
5325202Skarels m_freem(mp); \
5425202Skarels retval = ENETUNREACH; \
5525202Skarels } else if ((rt)->rt_flags & (RTF_GATEWAY|RTF_HOST)) \
5625202Skarels retval = (*(ifp)->if_output)(ifp, mp, &(rt)->rt_gateway); \
5725202Skarels else { \
5825202Skarels tmproute.sin_addr.s_addr = (mtod(mp, struct ip *))->ip_dst.s_addr; \
5925202Skarels retval = (*(ifp)->if_output)(ifp, mp, (struct sockaddr *) &tmproute); \
6025202Skarels }}
6125202Skarels
if_send(ifp,mp,rt)6225202Skarels if_send(ifp, mp, rt)
6325202Skarels register struct ifnet *ifp;
6425202Skarels register struct mbuf *mp;
6525202Skarels register struct rtentry *rt;
6625202Skarels {
6725202Skarels int retval;
6825202Skarels
6925202Skarels IF_SEND (ifp, mp, rt, retval);
7025202Skarels return (retval);
7125202Skarels }
7225202Skarels
7325202Skarels
7425202Skarels /*
7525202Skarels * Find a route to this destination. Given the source and destination
7625202Skarels * addresses, it returns a local net address
7725202Skarels * to send to (either the address of the destination itself or a gateway).
7825202Skarels * Taken mostly from rtalloc; expanded to route according to
7925202Skarels * both ends of the connection.
8025202Skarels */
8125202Skarels
8225202Skarels
ip_route(src,dst)8325202Skarels struct rtentry *ip_route(src, dst)
8425202Skarels struct in_addr *src;
8525202Skarels struct in_addr *dst;
8625202Skarels {
8725202Skarels register struct rtentry *rt;
8825202Skarels register struct mbuf *m;
8925202Skarels register unsigned hash;
9025202Skarels net_t snet, dnet;
9125202Skarels int doinghost;
9225202Skarels struct rtentry *rtmin;
9325202Skarels struct mbuf **table;
9425202Skarels static struct in_addr wildcard;
9525202Skarels
9625202Skarels /* get network parts of src and dest addresses */
9725202Skarels
9825202Skarels snet = iptonet(*src);
9925202Skarels dnet = iptonet(*dst);
10025202Skarels
10125202Skarels rtmin = NULL;
10225202Skarels hash = HOSTHASH(dst->s_addr);
10325202Skarels table = rthost;
10425202Skarels doinghost = TRUE;
10525202Skarels again :
10625202Skarels for (m = table[hash % RTHASHSIZ]; m; m = m->m_next)
10725202Skarels {
10825202Skarels rt = mtod(m, struct rtentry *);
10925202Skarels if (rt->rt_hash != hash)
11025202Skarels continue;
11125202Skarels if (! (rt->rt_flags & RTF_UP))
11225202Skarels continue;
11325202Skarels if (! (rt->rt_ifp->if_flags & IFF_UP))
11425202Skarels continue;
11525202Skarels if (rt->rt_dst.sa_family != AF_INET)
11625202Skarels continue;
11725202Skarels
11825202Skarels /* packets go out an interface with our local IP address */
11925202Skarels if (iptonet(((struct sockaddr_in *)&(rt->rt_gateway))->sin_addr) != snet)
12025202Skarels continue;
12125202Skarels
12225202Skarels /* does this route get us there? */
12325202Skarels if (doinghost)
12425202Skarels {
12525202Skarels if (((struct sockaddr_in *)&(rt->rt_dst))->sin_addr.s_addr !=
12625202Skarels dst->s_addr)
12725202Skarels continue;
12825202Skarels }
12925202Skarels else
13025202Skarels {
13125202Skarels /*
13225202Skarels * iptonet == 0 => smart gateway (route to anywhere)
13325202Skarels * iptonet != 0 => gateway to another net (route to net)
13425202Skarels */
13525202Skarels if (iptonet(((struct sockaddr_in *)&(rt->rt_dst))->sin_addr) != dnet)
13625202Skarels continue;
13725202Skarels }
13825202Skarels
13925202Skarels /* and try to share load across gateways */
14025202Skarels if (rtmin == NULL)
14125202Skarels rtmin = rt;
14225202Skarels else if (rt->rt_use < rtmin->rt_use)
14325202Skarels rtmin = rt;
14425202Skarels }
14525202Skarels
14625202Skarels if (rtmin == NULL)
14725202Skarels {
14825202Skarels if (doinghost)
14925202Skarels {
15025202Skarels doinghost = FALSE;
15125202Skarels hash = NETHASH(*dst), table = rtnet;
15225202Skarels goto again;
15325202Skarels }
15425202Skarels /*
15525202Skarels * Check for wildcard gateway, by convention network 0.
15625202Skarels */
15725202Skarels if (dst != &wildcard)
15825202Skarels {
15925202Skarels hash = 0;
16025202Skarels dst = &wildcard;
16125202Skarels dnet = 0;
16225202Skarels goto again;
16325202Skarels }
16425202Skarels rtstat.rts_unreach++;
16525202Skarels return(NULL);
16625202Skarels }
16725202Skarels
16825202Skarels rtmin->rt_refcnt++;
16925202Skarels if (dst == &wildcard)
17025202Skarels rtstat.rts_wildcard++;
17125202Skarels return(rtmin);
17225202Skarels }
17325202Skarels
17425202Skarels
17525202Skarels /*
17625202Skarels * Ip_send is called from the higher protocol layer (TCP/RDP/UDP) and is passed
17725202Skarels * an mbuf chain containing a packet to send to the local network. The first
17825202Skarels * mbuf contains the protocol header and an IP header which is partially
17925202Skarels * filled in. After determining a route (outgoing interface + first hop) for
18025202Skarels * the packet, it is fragmented (if necessary) and sent to the local net
18125202Skarels * through the local net send routine.
18225202Skarels *
18325202Skarels * For non-raw output, caller should have stuffed:
18425202Skarels * ip protocol type, type of service, source addr, destin addr
18525202Skarels *
18625202Skarels * ip_tos is left to caller so that people using raw sockets can do whatever
18725202Skarels * they please. (They don't have an inpcb in which to store such info.)
18825202Skarels *
18925202Skarels * The asis argument is TRUE for raw output and the gateway (packet forwarding)
19025202Skarels * code. It indicates that the IP header is fully constructed.
19125202Skarels *
19225202Skarels * Errors at the IP layer and below occur synchronously, and can be reported
19325202Skarels * back via subroutine return values. Higher level protocols should remember
19425202Skarels * that if they do things asynchronous to a system call (ie., packet
19525202Skarels * retransmission) that they should post error back to user via advise_user()
19625202Skarels * so that user gets error next time he rendezvous with the kernel.
19725202Skarels */
19825202Skarels ip_send(inp, mp, len, asis)
19925202Skarels struct inpcb *inp;
20025202Skarels register struct mbuf *mp;
20125202Skarels register int len;
20225202Skarels int asis;
20325202Skarels {
20425202Skarels register struct ip *p;
20525202Skarels register struct ifnet *ifp;
20625202Skarels register struct rtentry *rt;
20725202Skarels register int hlen;
20825202Skarels int free_route = FALSE;
20925202Skarels int retval;
21025202Skarels
21125202Skarels p = mtod(mp, struct ip *); /* -> ip header */
21225202Skarels /*
21325202Skarels * Find route for datagram if one has not been assigned.
21425202Skarels */
21525202Skarels if ((rt = inp->inp_route.ro_rt) == NULL)
21625202Skarels {
21725202Skarels if ((rt = ip_route(&p->ip_src, &p->ip_dst)) == NULL)
21825202Skarels {
21925202Skarels if (asis || (p->ip_src.s_addr == INADDR_ANY))
22025202Skarels {
22125202Skarels /*
22225202Skarels * asis: forwarding a packet not sourced by us
22325202Skarels * eg., by raw interface and user level repeater process
22425202Skarels * INADDR_ANY: sending icmp packet for which
22525202Skarels * we're trying to avoid routing twice.
22625202Skarels */
22725202Skarels struct route tmproute;
22825202Skarels struct sockaddr_in *sin;
22925202Skarels
23025202Skarels bzero ((caddr_t) &tmproute, sizeof(tmproute));
23125202Skarels sin = (struct sockaddr_in *) &tmproute.ro_dst;
23225202Skarels sin->sin_family = AF_INET;
23325202Skarels sin->sin_addr.s_addr = p->ip_dst.s_addr;
23425202Skarels rtalloc (&tmproute);
23525202Skarels rt = tmproute.ro_rt;
23625202Skarels
23725202Skarels if (rt && (p->ip_src.s_addr == INADDR_ANY))
23825202Skarels p->ip_src = IA_INADDR(in_iafromif(rt->rt_ifp));
23925202Skarels }
24025202Skarels
24125202Skarels if (rt == NULL)
24225202Skarels {
24325202Skarels m_freem(mp);
24425202Skarels return(ENETUNREACH);
24525202Skarels }
24625202Skarels }
24725202Skarels free_route = TRUE;
24825202Skarels }
24925202Skarels ifp = rt->rt_ifp;
25025202Skarels
25125202Skarels /*
25225202Skarels * Copy ip source route to header. Know asis must be FALSE, if do.
25325202Skarels */
25425202Skarels if (inp->inp_optlen > 0)
25525202Skarels {
25625202Skarels char *q;
25725202Skarels
25825202Skarels if (mp->m_off - inp->inp_optlen >= MMINOFF)
25925202Skarels {
26025202Skarels struct in_addr *ipa;
26125202Skarels
26225202Skarels mp->m_off -= inp->inp_optlen;
26325202Skarels mp->m_len += inp->inp_optlen;
26425202Skarels q = (char *) p;
26525202Skarels p = (struct ip *) (q - inp->inp_optlen);
26625202Skarels bcopy(q, (caddr_t)p, sizeof(struct ip));
26725202Skarels bcopy(inp->inp_options, (caddr_t)(p+1), (unsigned)inp->inp_optlen);
26825202Skarels /*
26925202Skarels * And replate eventual destination with first hop.
27025202Skarels * Eventual destination is in source route just
27125202Skarels * copied in.
27225202Skarels */
27325202Skarels ipa = (struct in_addr *) (&inp->inp_options[0]);
27425202Skarels p->ip_dst = ipa[inp->inp_optlen/sizeof(struct in_addr)];
27525202Skarels }
27625202Skarels else
277*25207Skarels log(LOG_INFO, "ip_send: optlen %d inpcb 0x%x\n",
27825202Skarels (int)inp->inp_optlen, inp);
27925202Skarels }
28025202Skarels
28125202Skarels /*
28225202Skarels * fill in ip header fields
28325202Skarels */
28425202Skarels if (asis)
28525202Skarels {
28625202Skarels /*
28725202Skarels * RAW OUTPUT. Must get len, hlen, off from packet header.
28825202Skarels * Byte swap is ugly (since we must swap back below), but
28925202Skarels * necessary in case we must fragment.
29025202Skarels */
29125202Skarels hlen = p->ip_hl << IP_HLSHIFT;
29225202Skarels len = ntohs(p->ip_len);
29325202Skarels p->ip_off = ntohs(p->ip_off);
29425202Skarels }
29525202Skarels else
29625202Skarels {
29725202Skarels static u_short next_ip_id; /* some day RDP may want to force for rxmit */
29825202Skarels
29925202Skarels hlen = sizeof(struct ip) + inp->inp_optlen;
30025202Skarels len += hlen;
30125202Skarels p->ip_v = IPVERSION;
30225202Skarels p->ip_hl = hlen >> IP_HLSHIFT;
30325202Skarels p->ip_off = 0;
30425202Skarels p->ip_ttl = MAXTTL; /* ### should come from route */
30525202Skarels p->ip_id = htons(next_ip_id++);
30625202Skarels }
30725202Skarels
30825202Skarels /*
30925202Skarels * let ip_frag do the send if needed, otherwise do it directly.
31025202Skarels */
31125202Skarels
31225202Skarels /* for testing IP reassembly code */
31325202Skarels #ifdef FORCE_FRAG
31425202Skarels #define MTU(ifp) (((ifp)->if_mtu >> FORCE_FRAG) & (~3))
31525202Skarels #else
31625202Skarels #define MTU(ifp) (ifp)->if_mtu
31725202Skarels #endif
31825202Skarels
31925202Skarels if (len > MTU(ifp))
32025202Skarels {
32125202Skarels p->ip_len = len;
32225202Skarels retval = ip_frag(p, ifp, rt, hlen);
32325202Skarels }
32425202Skarels else
32525202Skarels {
32625202Skarels /*
32725202Skarels * complete header, byte swap, and send to local net
32825202Skarels */
32925202Skarels p->ip_len = htons((u_short)len);
33025202Skarels p->ip_off = htons(p->ip_off);
33125202Skarels /*
33225202Skarels * No reason not to have kernel checksum, even for raw packets.
33325202Skarels */
33425202Skarels p->ip_sum = 0;
33525202Skarels p->ip_sum = in_cksum(dtom(p), hlen);
33625202Skarels IF_SEND (ifp, mp, rt, retval);
33725202Skarels }
33825202Skarels
33925202Skarels rt->rt_use ++; /* Yet another IP packet sent away */
34025202Skarels
34125202Skarels if (free_route)
34225202Skarels {
34325202Skarels struct socket *so;
34425202Skarels
34525202Skarels if ((so = inp->inp_socket) &&
34625202Skarels (so->so_proto->pr_flags & PR_CONNREQUIRED))
34725202Skarels /*
34825202Skarels * Found a new route after old one pinged out.
34925202Skarels */
35025202Skarels inp->inp_route.ro_rt = rt;
35125202Skarels else
35225202Skarels rtfree(rt);
35325202Skarels }
35425202Skarels
35525202Skarels return(retval);
35625202Skarels }
35725202Skarels
35825202Skarels /*
35925202Skarels * Ip_frag is called with a packet with a completed ip header
36025202Skarels * (except for checksum). It fragments the packet, inserts the
36125202Skarels * IP checksum, and calls the appropriate local net output routine
36225202Skarels * to send it to the net.
36325202Skarels *
36425202Skarels * Previously, when there was only one kind of mbuf, it tried to
36525202Skarels * reduce space requirements by recycling the chain to be fragmented.
36625202Skarels * Preserving this approach is overly complicated, and should mbufs
36725202Skarels * change again, cause problems. Therefore, have switched to copying
36825202Skarels * the chain to be fragmented.
36925202Skarels */
ip_frag(p,ifp,rt,hlen)37025202Skarels ip_frag(p, ifp, rt, hlen)
37125202Skarels register struct ip *p;
37225202Skarels struct ifnet *ifp;
37325202Skarels struct rtentry *rt;
37425202Skarels register int hlen;
37525202Skarels {
37625202Skarels register struct mbuf *m; /* original chunk */
37725202Skarels register struct mbuf *mhdr; /* fragment */
37825202Skarels register struct ip *fip; /* the fragment IP header */
37925202Skarels int off; /* offset into entire IP datagram */
38025202Skarels int here; /* offset into this chunk of it */
38125202Skarels register int len; /* length of data in this chunk */
38225202Skarels int flags; /* of this chunk to fragment */
38325202Skarels int max; /* max data length in a fragment */
38425202Skarels int fdlen; /* actual fragment data length */
38525202Skarels int error;
38625202Skarels
38725202Skarels m = dtom(p);
38825202Skarels
38925202Skarels if (p->ip_off & ip_df)
39025202Skarels { /* can't fragment */
39125202Skarels m_freem(m);
39225202Skarels return(EMSGSIZE);
39325202Skarels }
39425202Skarels max = MTU(ifp) - hlen; /* max data length in frag */
39525202Skarels len = p->ip_len - hlen; /* data length */
39625202Skarels
39725202Skarels /*
39825202Skarels * this only needs to be this complicated if we are handed
39925202Skarels * an already-fragmented packet
40025202Skarels */
40125202Skarels flags = p->ip_off&(ip_mf|ip_df); /* save old flags */
40225202Skarels p->ip_off &= ~flags; /* take them out of ip_off */
40325202Skarels off = p->ip_off << IP_OFFSHIFT; /* fragment offset */
40425202Skarels here = hlen;
40525202Skarels error = 0;
40625202Skarels
40725202Skarels while (len > 0)
40825202Skarels {
40925202Skarels /*
41025202Skarels * Allocate mbuf for fragment IP header
41125202Skarels */
41225202Skarels mhdr = m_get(M_DONTWAIT, MT_HEADER);
41325202Skarels if (mhdr == NULL)
41425202Skarels {
41525202Skarels error = ENOBUFS;
41625202Skarels break;
41725202Skarels }
41825202Skarels /*
41925202Skarels * get copy of data for fragment
42025202Skarels */
42125202Skarels if (len < max)
42225202Skarels fdlen = len;
42325202Skarels else
42425202Skarels fdlen = max & (~7); /* 7 == 2^IP_OFFSHIFT -1 */
42525202Skarels mhdr->m_next = m_copy(m, here, fdlen);
42625202Skarels if (mhdr->m_next == NULL)
42725202Skarels {
42825202Skarels m_free(mhdr);
42925202Skarels error = ENOBUFS;
43025202Skarels break;
43125202Skarels }
43225202Skarels /*
43325202Skarels * build the header for this fragment and ship it off.
43425202Skarels */
43525202Skarels mhdr->m_len = hlen;
43625202Skarels mhdr->m_off = MMAXOFF - hlen;
43725202Skarels fip = mtod(mhdr, struct ip *);
43825202Skarels bcopy((caddr_t)p, (caddr_t)fip, (unsigned)hlen);
43925202Skarels fip->ip_off = off >> IP_OFFSHIFT;
44025202Skarels if (fdlen >= len)
44125202Skarels /* it's the last fragment */
44225202Skarels fip->ip_off |= flags;
44325202Skarels else
44425202Skarels fip->ip_off |= ip_mf;
44525202Skarels fip->ip_off = htons((u_short)fip->ip_off);
44625202Skarels fip->ip_len = htons((u_short)fdlen + hlen);
44725202Skarels fip->ip_sum = 0;
44825202Skarels fip->ip_sum = in_cksum(mhdr, hlen);
44925202Skarels if (error = if_send (ifp, mhdr, rt))
45025202Skarels break;
45125202Skarels
45225202Skarels /*
45325202Skarels * and get ready for next pass through the loop
45425202Skarels */
45525202Skarels len -= fdlen;
45625202Skarels off += fdlen;
45725202Skarels here += fdlen;
45825202Skarels }
45925202Skarels
46025202Skarels m_freem(m);
46125202Skarels return (error);
46225202Skarels }
46325202Skarels
46425202Skarels /*
46525202Skarels * Current connection should use a new path.
46625202Skarels */
ip_reroute(inp)46725202Skarels struct rtentry *ip_reroute(inp)
46825202Skarels register struct inpcb *inp;
46925202Skarels {
47025202Skarels register struct route *ro = &inp->inp_route;
47125202Skarels
47225202Skarels rtfree(ro->ro_rt);
47325202Skarels return(ro->ro_rt = ip_route(&inp->inp_laddr, &inp->inp_faddr));
47425202Skarels }
47525202Skarels
47625202Skarels /*
47725202Skarels * A gateway has gone down. Change route used by all connections currently
47825202Skarels * using it.
47925202Skarels */
ip_gdown(addr)48025202Skarels ip_gdown(addr)
48125202Skarels u_long addr;
48225202Skarels {
48325202Skarels register struct protosw *psw;
48425202Skarels
48525202Skarels for(psw=inetdomain.dom_protosw; psw < inetdomain.dom_protoswNPROTOSW; psw++)
48625202Skarels if (psw->pr_type != SOCK_RAW)
48725202Skarels if (psw->pr_ctlinput)
48825202Skarels (*(psw->pr_ctlinput)) (PRC_GWDOWN, addr);
48925202Skarels }
49025202Skarels
49125202Skarels /*
49225202Skarels * Called from protocol ctlinput routine. This way, IP/ICMP don't need to know
49325202Skarels * about protocol's head of inpcbs... for all the protocols.
49425202Skarels */
in_gdown(head,addr)49525202Skarels in_gdown (head, addr)
49625202Skarels register struct inpcb *head;
49725202Skarels u_long addr;
49825202Skarels {
49925202Skarels register struct inpcb *inp;
50025202Skarels register struct rtentry *rt;
50125202Skarels
50225202Skarels if (head == NULL)
50325202Skarels return;
50425202Skarels
50525202Skarels for(inp = head->inp_next; inp != head; inp = inp->inp_next)
50625202Skarels {
50725202Skarels if (rt = inp->inp_route.ro_rt)
50825202Skarels {
50925202Skarels if (rt->rt_flags & RTF_GATEWAY)
51025202Skarels {
51125202Skarels if (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == addr)
51225202Skarels {
51325202Skarels /*
51425202Skarels * Don't remove route permanently, since want to catch
51525202Skarels * the gateway when it reboots:
51625202Skarels * -- rtrequest (SIOCDELRT, rt) --
51725202Skarels *
51825202Skarels * make sure rtfree() not remove route mbuf
51925202Skarels * incrementing reference count here, and decrementing
52025202Skarels * when timeout on reinstatement goes off. Cannot call
52125202Skarels * rtfree with zero reference count when have not done
52225202Skarels * SIOCDELRT.
52325202Skarels */
52425202Skarels if (rt->rt_flags & RTF_UP)
52525202Skarels {
52625202Skarels rt->rt_flags &= ~RTF_UP;
52725202Skarels rt->rt_flags |= RTF_REINSTATE;
52825202Skarels rt->irt_gdown = RT_REINSTATE;
52925202Skarels rt->rt_refcnt ++;
53025202Skarels }
53125202Skarels
53225202Skarels if (!ip_reroute(inp))
53325202Skarels advise_user(inp->inp_socket, ENETUNREACH);
53425202Skarels
53525202Skarels }
53625202Skarels }
53725202Skarels }
53825202Skarels }
53925202Skarels }
540