10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
70Sstevel@tonic-gate * with the License.
80Sstevel@tonic-gate *
90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate * See the License for the specific language governing permissions
120Sstevel@tonic-gate * and limitations under the License.
130Sstevel@tonic-gate *
140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate *
200Sstevel@tonic-gate * CDDL HEADER END
210Sstevel@tonic-gate */
220Sstevel@tonic-gate /*
23*789Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
280Sstevel@tonic-gate
290Sstevel@tonic-gate #include <dhcp_impl.h>
300Sstevel@tonic-gate #include <sys/types.h>
310Sstevel@tonic-gate #include <socket_impl.h>
320Sstevel@tonic-gate #include <socket_inet.h>
330Sstevel@tonic-gate #include <sys/time.h>
340Sstevel@tonic-gate #include <sys/socket.h>
350Sstevel@tonic-gate #include <net/if.h>
360Sstevel@tonic-gate #include <net/if_arp.h>
370Sstevel@tonic-gate #include <netinet/in_systm.h>
380Sstevel@tonic-gate #include <netinet/in.h>
390Sstevel@tonic-gate #include <netinet/ip.h>
400Sstevel@tonic-gate #include <netinet/if_ether.h>
410Sstevel@tonic-gate #include <sys/promif.h>
420Sstevel@tonic-gate #include <sys/prom_plat.h>
430Sstevel@tonic-gate #include <sys/salib.h>
440Sstevel@tonic-gate #include <sys/bootdebug.h>
450Sstevel@tonic-gate #include <sys/ib/clients/ibd/ibd.h>
460Sstevel@tonic-gate
470Sstevel@tonic-gate #include "ipv4.h"
480Sstevel@tonic-gate #include "dhcpv4.h"
490Sstevel@tonic-gate #include "ipv4_impl.h"
500Sstevel@tonic-gate #include "mac.h"
510Sstevel@tonic-gate #include "mac_impl.h"
520Sstevel@tonic-gate #include "ibd_inet.h"
530Sstevel@tonic-gate
540Sstevel@tonic-gate struct ibd_arp {
550Sstevel@tonic-gate struct arphdr ea_hdr; /* fixed-size header */
560Sstevel@tonic-gate ipoib_mac_t arp_sha; /* sender hardware address */
570Sstevel@tonic-gate uchar_t arp_spa[4]; /* sender protocol address */
580Sstevel@tonic-gate ipoib_mac_t arp_tha; /* target hardware address */
590Sstevel@tonic-gate uchar_t arp_tpa[4]; /* target protocol address */
600Sstevel@tonic-gate };
610Sstevel@tonic-gate
620Sstevel@tonic-gate extern int errno;
630Sstevel@tonic-gate ipoib_mac_t ibdbroadcastaddr;
640Sstevel@tonic-gate
650Sstevel@tonic-gate /*
660Sstevel@tonic-gate * Assumptions about OBP behavior (refer FWARC 2002/702, 2003/251):
670Sstevel@tonic-gate * 1. prom_write() accepts the 20 byte destination address as the
680Sstevel@tonic-gate * first component in the send buffer. The buffer pointer points
690Sstevel@tonic-gate * to the start of this 20 byte address. The length parameter is
700Sstevel@tonic-gate * the IPoIB datagram size with the 20 byte of destination
710Sstevel@tonic-gate * address.
720Sstevel@tonic-gate * 2. OBP will not provide max-frame-size, since obp can only
730Sstevel@tonic-gate * determine that by querying the IBA mcg, and thus the property
740Sstevel@tonic-gate * has to be /chosen:ipib-frame-size. This will refer to the IPoIB
750Sstevel@tonic-gate * link MTU as per section 4.0 of ietf i/d, ie, the 4 byte IPoIB
760Sstevel@tonic-gate * header plus the IP payload mtu. Plus the 20 bytes of addressing
770Sstevel@tonic-gate * information.
780Sstevel@tonic-gate * 3. OBP will not provide mac-address property for IPoIB since there
790Sstevel@tonic-gate * are built in assumptions about 6 byte address with that. Instead,
800Sstevel@tonic-gate * /chosen:ipib-address will provide the local address.
810Sstevel@tonic-gate * 4. prom_read() returns 20 byte 0'ed filler followed by 4 byte
820Sstevel@tonic-gate * IPoIB header followed by IP payload. The return value is -2,
830Sstevel@tonic-gate * -1, 0, or the length of the received IPoIB datagram alongwith
840Sstevel@tonic-gate * the 20 bytes MBZ. The buffer pointer points to the start of
850Sstevel@tonic-gate * the 20 MBZ bytes. The length parameter reflects the max data
860Sstevel@tonic-gate * size that should be copied into the buffer including the 20
870Sstevel@tonic-gate * MBZ bytes.
880Sstevel@tonic-gate * 5. OBP will not provide chosen-network-type, only
890Sstevel@tonic-gate * network-interface-type = ipib. On an Infiniband device, this
900Sstevel@tonic-gate * however does not guarantee that it is a network device.
910Sstevel@tonic-gate * 6. OBP will provide the DHCP client id in /chosen:client-id.
920Sstevel@tonic-gate * 7. /chosen:ipib-broadcast will provide the broadcast address.
930Sstevel@tonic-gate * 8. OBP will validate that RARP is not being used before
940Sstevel@tonic-gate * allowing boot to proceed to inetboot.
950Sstevel@tonic-gate */
960Sstevel@tonic-gate
970Sstevel@tonic-gate struct arp_packet {
980Sstevel@tonic-gate ipoib_ptxhdr_t arp_eh;
990Sstevel@tonic-gate struct ibd_arp arp_ea;
1000Sstevel@tonic-gate };
1010Sstevel@tonic-gate
1020Sstevel@tonic-gate #define dprintf if (boothowto & RB_DEBUG) printf
1030Sstevel@tonic-gate
1040Sstevel@tonic-gate static char *
ibd_print(ipoib_mac_t * ea)1050Sstevel@tonic-gate ibd_print(ipoib_mac_t *ea)
1060Sstevel@tonic-gate {
1070Sstevel@tonic-gate unsigned char *macaddr = (unsigned char *)ea;
1080Sstevel@tonic-gate static char pbuf[(3 * IPOIB_ADDRL) + 1];
1090Sstevel@tonic-gate int i;
1100Sstevel@tonic-gate char *ptr = pbuf;
1110Sstevel@tonic-gate
1120Sstevel@tonic-gate ptr = pbuf + sprintf(pbuf, "%x", *macaddr++);
1130Sstevel@tonic-gate for (i = 0; i < (IPOIB_ADDRL - 1); i++)
1140Sstevel@tonic-gate ptr += sprintf(ptr, ":%x", *macaddr++);
1150Sstevel@tonic-gate return (pbuf);
1160Sstevel@tonic-gate }
1170Sstevel@tonic-gate
1180Sstevel@tonic-gate
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate * Common ARP code. Broadcast the packet and wait for the right response.
1210Sstevel@tonic-gate *
1220Sstevel@tonic-gate * If arp is called for, caller expects a hardware address in the
1230Sstevel@tonic-gate * source hardware address (sha) field of the "out" argument.
1240Sstevel@tonic-gate *
1250Sstevel@tonic-gate * IPoIB does not support RARP (see ibd_revarp()).
1260Sstevel@tonic-gate *
1270Sstevel@tonic-gate * Returns TRUE if transaction succeeded, FALSE otherwise.
1280Sstevel@tonic-gate *
1290Sstevel@tonic-gate * The timeout argument is the number of milliseconds to wait for a
1300Sstevel@tonic-gate * response. An infinite timeout can be specified as 0xffffffff.
1310Sstevel@tonic-gate */
1320Sstevel@tonic-gate static int
ibd_comarp(struct arp_packet * out,uint32_t timeout)1330Sstevel@tonic-gate ibd_comarp(struct arp_packet *out, uint32_t timeout)
1340Sstevel@tonic-gate {
1350Sstevel@tonic-gate struct arp_packet *in = (struct arp_packet *)mac_state.mac_buf;
1360Sstevel@tonic-gate int count, time, feedback, len, delay = 2;
1370Sstevel@tonic-gate char *ind = "-\\|/";
1380Sstevel@tonic-gate struct in_addr tmp_ia;
1390Sstevel@tonic-gate uint32_t wait_time;
1400Sstevel@tonic-gate
1410Sstevel@tonic-gate bcopy((caddr_t)&ibdbroadcastaddr, (caddr_t)&out->arp_eh.ipoib_dest,
1420Sstevel@tonic-gate IPOIB_ADDRL);
1430Sstevel@tonic-gate
1440Sstevel@tonic-gate out->arp_ea.arp_hrd = htons(ARPHRD_IB);
1450Sstevel@tonic-gate out->arp_ea.arp_pro = htons(ETHERTYPE_IP);
1460Sstevel@tonic-gate out->arp_ea.arp_hln = IPOIB_ADDRL;
1470Sstevel@tonic-gate out->arp_ea.arp_pln = sizeof (struct in_addr);
1480Sstevel@tonic-gate bcopy(mac_state.mac_addr_buf, (caddr_t)&out->arp_ea.arp_sha,
1490Sstevel@tonic-gate IPOIB_ADDRL);
1500Sstevel@tonic-gate ipv4_getipaddr(&tmp_ia);
1510Sstevel@tonic-gate tmp_ia.s_addr = htonl(tmp_ia.s_addr);
1520Sstevel@tonic-gate bcopy((caddr_t)&tmp_ia, (caddr_t)out->arp_ea.arp_spa,
1530Sstevel@tonic-gate sizeof (struct in_addr));
1540Sstevel@tonic-gate feedback = 0;
1550Sstevel@tonic-gate
1560Sstevel@tonic-gate wait_time = prom_gettime() + timeout;
1570Sstevel@tonic-gate for (count = 0; timeout == ~0U || prom_gettime() < wait_time; count++) {
1580Sstevel@tonic-gate if (count == IBD_WAITCNT) {
1590Sstevel@tonic-gate /*
1600Sstevel@tonic-gate * Since IPoIB does not support RARP (see ibd_revarp),
1610Sstevel@tonic-gate * we know that out->arp_ea.arp_op == ARPOP_REQUEST.
1620Sstevel@tonic-gate */
1630Sstevel@tonic-gate bcopy((caddr_t)out->arp_ea.arp_tpa,
1640Sstevel@tonic-gate (caddr_t)&tmp_ia, sizeof (struct in_addr));
1650Sstevel@tonic-gate printf("\nRequesting MAC address for: %s\n",
1660Sstevel@tonic-gate inet_ntoa(tmp_ia));
1670Sstevel@tonic-gate }
1680Sstevel@tonic-gate
1690Sstevel@tonic-gate (void) prom_write(mac_state.mac_dev, (caddr_t)out,
1700Sstevel@tonic-gate sizeof (*out), 0, NETWORK);
1710Sstevel@tonic-gate
1720Sstevel@tonic-gate if (count >= IBD_WAITCNT)
1730Sstevel@tonic-gate printf("%c\b", ind[feedback++ % 4]); /* activity */
1740Sstevel@tonic-gate
1750Sstevel@tonic-gate time = prom_gettime() + (delay * 1000); /* broadcast delay */
1760Sstevel@tonic-gate while (prom_gettime() <= time) {
1770Sstevel@tonic-gate len = prom_read(mac_state.mac_dev, mac_state.mac_buf,
1780Sstevel@tonic-gate mac_state.mac_mtu, 0, NETWORK);
1790Sstevel@tonic-gate if (len < sizeof (struct arp_packet))
1800Sstevel@tonic-gate continue;
1810Sstevel@tonic-gate if (in->arp_ea.arp_pro != ntohs(ETHERTYPE_IP))
1820Sstevel@tonic-gate continue;
1830Sstevel@tonic-gate /*
1840Sstevel@tonic-gate * Since IPoIB does not support RARP (see ibd_revarp),
1850Sstevel@tonic-gate * we know that out->arp_ea.arp_op == ARPOP_REQUEST.
1860Sstevel@tonic-gate */
1870Sstevel@tonic-gate if (in->arp_eh.ipoib_rhdr.ipoib_type !=
1880Sstevel@tonic-gate ntohs(ETHERTYPE_ARP))
1890Sstevel@tonic-gate continue;
1900Sstevel@tonic-gate if (in->arp_ea.arp_op != ntohs(ARPOP_REPLY))
1910Sstevel@tonic-gate continue;
1920Sstevel@tonic-gate if (bcmp((caddr_t)in->arp_ea.arp_spa,
1930Sstevel@tonic-gate (caddr_t)out->arp_ea.arp_tpa,
1940Sstevel@tonic-gate sizeof (struct in_addr)) != 0)
1950Sstevel@tonic-gate continue;
1960Sstevel@tonic-gate if (boothowto & RB_VERBOSE) {
1970Sstevel@tonic-gate bcopy((caddr_t)in->arp_ea.arp_spa,
1980Sstevel@tonic-gate (caddr_t)&tmp_ia,
1990Sstevel@tonic-gate sizeof (struct in_addr));
2000Sstevel@tonic-gate printf("Found %s @ %s\n",
2010Sstevel@tonic-gate inet_ntoa(tmp_ia),
2020Sstevel@tonic-gate ibd_print(&in->arp_ea.arp_sha));
2030Sstevel@tonic-gate }
2040Sstevel@tonic-gate /* copy hardware addr into "out" for caller */
2050Sstevel@tonic-gate bcopy((caddr_t)&in->arp_ea.arp_sha,
2060Sstevel@tonic-gate (caddr_t)&out->arp_ea.arp_sha, IPOIB_ADDRL);
2070Sstevel@tonic-gate return (TRUE);
2080Sstevel@tonic-gate }
2090Sstevel@tonic-gate
2100Sstevel@tonic-gate delay = delay * 2; /* Double the request delay */
2110Sstevel@tonic-gate if (delay > 64) /* maximum delay is 64 seconds */
2120Sstevel@tonic-gate delay = 64;
2130Sstevel@tonic-gate }
2140Sstevel@tonic-gate return (FALSE);
2150Sstevel@tonic-gate }
2160Sstevel@tonic-gate
2170Sstevel@tonic-gate /*
2180Sstevel@tonic-gate * ARP client side
2190Sstevel@tonic-gate * Broadcasts to determine MAC address given network order IP address.
2200Sstevel@tonic-gate * See RFC 826
2210Sstevel@tonic-gate *
2220Sstevel@tonic-gate * Returns TRUE if successful, FALSE otherwise.
2230Sstevel@tonic-gate */
2240Sstevel@tonic-gate static int
ibd_arp(struct in_addr * ip,void * hap,uint32_t timeout)2250Sstevel@tonic-gate ibd_arp(struct in_addr *ip, void *hap, uint32_t timeout)
2260Sstevel@tonic-gate {
2270Sstevel@tonic-gate ipoib_mac_t *ep = (ipoib_mac_t *)hap;
2280Sstevel@tonic-gate struct arp_packet out;
2290Sstevel@tonic-gate int result;
2300Sstevel@tonic-gate
2310Sstevel@tonic-gate if (!initialized)
2320Sstevel@tonic-gate prom_panic("IPoIB device is not initialized.");
2330Sstevel@tonic-gate
2340Sstevel@tonic-gate bzero((char *)&out, sizeof (struct arp_packet));
2350Sstevel@tonic-gate
2360Sstevel@tonic-gate out.arp_eh.ipoib_rhdr.ipoib_type = htons(ETHERTYPE_ARP);
2370Sstevel@tonic-gate out.arp_ea.arp_op = htons(ARPOP_REQUEST);
2380Sstevel@tonic-gate bcopy((caddr_t)&ibdbroadcastaddr, (caddr_t)&out.arp_ea.arp_tha,
2390Sstevel@tonic-gate IPOIB_ADDRL);
2400Sstevel@tonic-gate bcopy((caddr_t)ip, (caddr_t)out.arp_ea.arp_tpa,
2410Sstevel@tonic-gate sizeof (struct in_addr));
2420Sstevel@tonic-gate
2430Sstevel@tonic-gate result = ibd_comarp(&out, timeout);
2440Sstevel@tonic-gate
2450Sstevel@tonic-gate if (result && (ep != NULL)) {
2460Sstevel@tonic-gate bcopy((caddr_t)&out.arp_ea.arp_sha, (caddr_t)ep, IPOIB_ADDRL);
2470Sstevel@tonic-gate }
2480Sstevel@tonic-gate return (result);
2490Sstevel@tonic-gate }
2500Sstevel@tonic-gate
2510Sstevel@tonic-gate /*
2520Sstevel@tonic-gate * Reverse ARP client side
2530Sstevel@tonic-gate * Determine our Internet address given our MAC address
2540Sstevel@tonic-gate * See RFC 903
2550Sstevel@tonic-gate */
2560Sstevel@tonic-gate static void
ibd_revarp(void)2570Sstevel@tonic-gate ibd_revarp(void)
2580Sstevel@tonic-gate {
2590Sstevel@tonic-gate prom_panic("IPoIB can not boot with RARP.");
2600Sstevel@tonic-gate }
2610Sstevel@tonic-gate
2620Sstevel@tonic-gate /* ARGSUSED */
2630Sstevel@tonic-gate static int
ibd_header_len(struct inetgram * igm)2640Sstevel@tonic-gate ibd_header_len(struct inetgram *igm)
2650Sstevel@tonic-gate {
2660Sstevel@tonic-gate /*
2670Sstevel@tonic-gate * We indicate to upper layers to leave enough space
2680Sstevel@tonic-gate * in output buffers for filling in the IPoIB header
2690Sstevel@tonic-gate * and the 20 byte destination address in ibd_output().
2700Sstevel@tonic-gate */
2710Sstevel@tonic-gate return (IPOIB_HDRSIZE + IPOIB_ADDRL);
2720Sstevel@tonic-gate }
2730Sstevel@tonic-gate
2740Sstevel@tonic-gate /*
2750Sstevel@tonic-gate * Handle a IP datagram addressed to our MAC address or to the link
2760Sstevel@tonic-gate * layer broadcast address. Also respond to ARP requests. Generates
2770Sstevel@tonic-gate * inetgrams as long as there's data and the mac level IP timeout timer
2780Sstevel@tonic-gate * hasn't expired. As soon as there is no data, we try for
2790Sstevel@tonic-gate * IBD_INPUT_ATTEMPTS for more, then exit the loop, even if there is time
2800Sstevel@tonic-gate * left, since we expect to have data waiting for us when we're called, we just
2810Sstevel@tonic-gate * don't know how much.
2820Sstevel@tonic-gate *
2830Sstevel@tonic-gate * We workaround slow proms (some proms have hard sleeps for as much as 3msec)
2840Sstevel@tonic-gate * even though there are is data waiting.
2850Sstevel@tonic-gate *
2860Sstevel@tonic-gate * Returns the total number of MEDIA_LVL frames placed on the socket.
2870Sstevel@tonic-gate * Caller is expected to free up the inetgram resources.
2880Sstevel@tonic-gate */
2890Sstevel@tonic-gate static int
ibd_input(int index)2900Sstevel@tonic-gate ibd_input(int index)
2910Sstevel@tonic-gate {
2920Sstevel@tonic-gate struct inetgram *inp;
2930Sstevel@tonic-gate ipoib_ptxhdr_t *eh;
2940Sstevel@tonic-gate int frames = 0; /* successful frames */
2950Sstevel@tonic-gate int attempts = 0; /* failed attempts after success */
2960Sstevel@tonic-gate int16_t len = 0, data_len;
2970Sstevel@tonic-gate uint32_t timeout, reltime;
2980Sstevel@tonic-gate uint32_t pre_pr, post_pr; /* prom_read interval */
2990Sstevel@tonic-gate
3000Sstevel@tonic-gate #ifdef DEBUG
3010Sstevel@tonic-gate int failures = 0; /* total failures */
3020Sstevel@tonic-gate int total_attempts = 0; /* total prom_read */
3030Sstevel@tonic-gate int no_data = 0; /* no data in prom */
3040Sstevel@tonic-gate int arps = 0; /* arp requests processed */
3050Sstevel@tonic-gate uint32_t tot_pr = 0; /* prom_read time */
3060Sstevel@tonic-gate uint32_t tot_pc = 0; /* inetgram creation time */
3070Sstevel@tonic-gate uint32_t pre_pc;
3080Sstevel@tonic-gate uint32_t now;
3090Sstevel@tonic-gate #endif /* DEBUG */
3100Sstevel@tonic-gate
3110Sstevel@tonic-gate if (!initialized)
3120Sstevel@tonic-gate prom_panic("IPoIB device is not initialized.");
3130Sstevel@tonic-gate
3140Sstevel@tonic-gate if ((reltime = sockets[index].in_timeout) == 0)
3150Sstevel@tonic-gate reltime = mac_state.mac_in_timeout;
3160Sstevel@tonic-gate timeout = prom_gettime() + reltime;
3170Sstevel@tonic-gate
3180Sstevel@tonic-gate do {
3190Sstevel@tonic-gate if (frames > IBD_MAX_FRAMES) {
3200Sstevel@tonic-gate /* someone is trying a denial of service attack */
3210Sstevel@tonic-gate break;
3220Sstevel@tonic-gate }
3230Sstevel@tonic-gate
3240Sstevel@tonic-gate /*
3250Sstevel@tonic-gate * The following is being paranoid about possible bugs
3260Sstevel@tonic-gate * where prom_read() returns a nonzero length, even when
3270Sstevel@tonic-gate * it's not read a packet; it zeroes out the header to
3280Sstevel@tonic-gate * compensate. Paranoia from calvin prom (V2) days.
3290Sstevel@tonic-gate */
3300Sstevel@tonic-gate bzero(mac_state.mac_buf, sizeof (ipoib_ptxhdr_t));
3310Sstevel@tonic-gate
3320Sstevel@tonic-gate /*
3330Sstevel@tonic-gate * Prom_read() will return 0 or -2 if no data is present. A
3340Sstevel@tonic-gate * return value of -1 means an error has occurred. We adjust
3350Sstevel@tonic-gate * the timeout by calling the time spent in prom_read() "free".
3360Sstevel@tonic-gate * prom_read() returns the number of bytes actually read, but
3370Sstevel@tonic-gate * will only copy "len" bytes into our buffer. Adjust in
3380Sstevel@tonic-gate * case the MTU is wrong.
3390Sstevel@tonic-gate */
3400Sstevel@tonic-gate pre_pr = prom_gettime();
3410Sstevel@tonic-gate len = prom_read(mac_state.mac_dev, mac_state.mac_buf,
3420Sstevel@tonic-gate mac_state.mac_mtu, 0, NETWORK);
3430Sstevel@tonic-gate post_pr = prom_gettime();
3440Sstevel@tonic-gate timeout += (post_pr - pre_pr);
3450Sstevel@tonic-gate #ifdef DEBUG
3460Sstevel@tonic-gate tot_pr += (post_pr - pre_pr);
3470Sstevel@tonic-gate total_attempts++;
3480Sstevel@tonic-gate #endif /* DEBUG */
3490Sstevel@tonic-gate
3500Sstevel@tonic-gate if (len > mac_state.mac_mtu) {
3510Sstevel@tonic-gate dprintf("ibd_input: adjusting MTU %d -> %d\n",
3520Sstevel@tonic-gate mac_state.mac_mtu, len);
3530Sstevel@tonic-gate bkmem_free(mac_state.mac_buf, mac_state.mac_mtu);
3540Sstevel@tonic-gate mac_state.mac_mtu = len;
3550Sstevel@tonic-gate mac_state.mac_buf = bkmem_alloc(mac_state.mac_mtu);
3560Sstevel@tonic-gate if (mac_state.mac_buf == NULL) {
3570Sstevel@tonic-gate prom_panic("ibd_input: Cannot reallocate "
3580Sstevel@tonic-gate "netbuf memory.");
3590Sstevel@tonic-gate }
3600Sstevel@tonic-gate len = 0; /* pretend there was no data */
3610Sstevel@tonic-gate }
3620Sstevel@tonic-gate
3630Sstevel@tonic-gate if (len == -1) {
3640Sstevel@tonic-gate #ifdef DEBUG
3650Sstevel@tonic-gate failures++;
3660Sstevel@tonic-gate #endif /* DEBUG */
3670Sstevel@tonic-gate break;
3680Sstevel@tonic-gate }
3690Sstevel@tonic-gate if (len == 0 || len == -2) {
3700Sstevel@tonic-gate if (frames != 0)
3710Sstevel@tonic-gate attempts++;
3720Sstevel@tonic-gate #ifdef DEBUG
3730Sstevel@tonic-gate no_data++;
3740Sstevel@tonic-gate #endif /* DEBUG */
3750Sstevel@tonic-gate continue;
3760Sstevel@tonic-gate }
3770Sstevel@tonic-gate
3780Sstevel@tonic-gate eh = (ipoib_ptxhdr_t *)mac_state.mac_buf;
3790Sstevel@tonic-gate if (eh->ipoib_rhdr.ipoib_type == ntohs(ETHERTYPE_IP) &&
3800Sstevel@tonic-gate len >= (sizeof (ipoib_ptxhdr_t) + sizeof (struct ip))) {
3810Sstevel@tonic-gate
3820Sstevel@tonic-gate int offset;
3830Sstevel@tonic-gate #ifdef DEBUG
3840Sstevel@tonic-gate pre_pc = prom_gettime();
3850Sstevel@tonic-gate #endif /* DEBUG */
3860Sstevel@tonic-gate
3870Sstevel@tonic-gate inp = (struct inetgram *)bkmem_zalloc(
3880Sstevel@tonic-gate sizeof (struct inetgram));
3890Sstevel@tonic-gate if (inp == NULL) {
3900Sstevel@tonic-gate errno = ENOMEM;
3910Sstevel@tonic-gate return (frames == 0 ? -1 : frames);
3920Sstevel@tonic-gate }
3930Sstevel@tonic-gate offset = sizeof (ipoib_ptxhdr_t);
3940Sstevel@tonic-gate data_len = len - offset;
3950Sstevel@tonic-gate inp->igm_mp = allocb(data_len, 0);
3960Sstevel@tonic-gate if (inp->igm_mp == NULL) {
3970Sstevel@tonic-gate errno = ENOMEM;
3980Sstevel@tonic-gate bkmem_free((caddr_t)inp,
3990Sstevel@tonic-gate sizeof (struct inetgram));
4000Sstevel@tonic-gate return (frames == 0 ? -1 : frames);
4010Sstevel@tonic-gate }
4020Sstevel@tonic-gate bcopy((caddr_t)(mac_state.mac_buf + offset),
4030Sstevel@tonic-gate inp->igm_mp->b_rptr, data_len);
4040Sstevel@tonic-gate inp->igm_mp->b_wptr += data_len;
4050Sstevel@tonic-gate inp->igm_level = NETWORK_LVL;
4060Sstevel@tonic-gate add_grams(&sockets[index].inq, inp);
4070Sstevel@tonic-gate frames++;
4080Sstevel@tonic-gate attempts = 0;
4090Sstevel@tonic-gate #ifdef DEBUG
4100Sstevel@tonic-gate tot_pc += prom_gettime() - pre_pc;
4110Sstevel@tonic-gate #endif /* DEBUG */
4120Sstevel@tonic-gate continue;
4130Sstevel@tonic-gate }
4140Sstevel@tonic-gate
4150Sstevel@tonic-gate if (eh->ipoib_rhdr.ipoib_type == ntohs(ETHERTYPE_ARP) &&
4160Sstevel@tonic-gate len >= sizeof (struct arp_packet)) {
4170Sstevel@tonic-gate
4180Sstevel@tonic-gate struct in_addr ip;
4190Sstevel@tonic-gate struct ibd_arp *ea;
4200Sstevel@tonic-gate
4210Sstevel@tonic-gate #ifdef DEBUG
4220Sstevel@tonic-gate printf("ibd_input: ARP message received\n");
4230Sstevel@tonic-gate arps++;
4240Sstevel@tonic-gate #endif /* DEBUG */
4250Sstevel@tonic-gate
4260Sstevel@tonic-gate ea = (struct ibd_arp *)(mac_state.mac_buf +
4270Sstevel@tonic-gate sizeof (ipoib_ptxhdr_t));
4280Sstevel@tonic-gate if (ea->arp_pro != ntohs(ETHERTYPE_IP))
4290Sstevel@tonic-gate continue;
4300Sstevel@tonic-gate
4310Sstevel@tonic-gate ipv4_getipaddr(&ip);
4320Sstevel@tonic-gate ip.s_addr = ntohl(ip.s_addr);
4330Sstevel@tonic-gate
4340Sstevel@tonic-gate if (ea->arp_op == ntohs(ARPOP_REQUEST) &&
4350Sstevel@tonic-gate ip.s_addr != INADDR_ANY &&
4360Sstevel@tonic-gate (bcmp((caddr_t)ea->arp_tpa, (caddr_t)&ip,
4370Sstevel@tonic-gate sizeof (struct in_addr)) == 0)) {
4380Sstevel@tonic-gate ea->arp_op = htons(ARPOP_REPLY);
4390Sstevel@tonic-gate bcopy((caddr_t)&ea->arp_sha,
4400Sstevel@tonic-gate (caddr_t)&eh->ipoib_dest, IPOIB_ADDRL);
4410Sstevel@tonic-gate bcopy((caddr_t)&ea->arp_sha,
4420Sstevel@tonic-gate (caddr_t)&ea->arp_tha, IPOIB_ADDRL);
4430Sstevel@tonic-gate bcopy((caddr_t)ea->arp_spa,
4440Sstevel@tonic-gate (caddr_t)ea->arp_tpa,
4450Sstevel@tonic-gate sizeof (struct in_addr));
4460Sstevel@tonic-gate bcopy(mac_state.mac_addr_buf,
4470Sstevel@tonic-gate (caddr_t)&ea->arp_sha,
4480Sstevel@tonic-gate mac_state.mac_addr_len);
4490Sstevel@tonic-gate bcopy((caddr_t)&ip, (caddr_t)ea->arp_spa,
4500Sstevel@tonic-gate sizeof (struct in_addr));
4510Sstevel@tonic-gate (void) prom_write(mac_state.mac_dev,
4520Sstevel@tonic-gate mac_state.mac_buf,
4530Sstevel@tonic-gate sizeof (struct arp_packet), 0, NETWORK);
4540Sstevel@tonic-gate /* don't charge for ARP replies */
4550Sstevel@tonic-gate timeout += reltime;
4560Sstevel@tonic-gate }
4570Sstevel@tonic-gate }
4580Sstevel@tonic-gate } while (attempts < IBD_INPUT_ATTEMPTS &&
4590Sstevel@tonic-gate #ifdef DEBUG
4600Sstevel@tonic-gate (now = prom_gettime()) < timeout);
4610Sstevel@tonic-gate #else
4620Sstevel@tonic-gate prom_gettime() < timeout);
4630Sstevel@tonic-gate #endif /* DEBUG */
4640Sstevel@tonic-gate
4650Sstevel@tonic-gate #ifdef DEBUG
4660Sstevel@tonic-gate printf("ibd_input(%d): T/S/N/A/F/P/M: %d/%d/%d/%d/%d/%d/%d "
4670Sstevel@tonic-gate "T/O: %d < %d = %s\n", index, total_attempts, frames, no_data,
4680Sstevel@tonic-gate arps, failures, tot_pr, tot_pc, now, timeout,
4690Sstevel@tonic-gate (now < timeout) ? "TRUE" : "FALSE");
4700Sstevel@tonic-gate #endif /* DEBUG */
4710Sstevel@tonic-gate return (frames);
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate
4740Sstevel@tonic-gate /*
4750Sstevel@tonic-gate * Send out an IPoIB datagram. We expect a IP frame appropriately fragmented
4760Sstevel@tonic-gate * at this level.
4770Sstevel@tonic-gate *
4780Sstevel@tonic-gate * Errno is set and -1 is returned if an error occurs. Number of bytes sent
4790Sstevel@tonic-gate * is returned on success.
4800Sstevel@tonic-gate */
4810Sstevel@tonic-gate /* ARGSUSED */
4820Sstevel@tonic-gate static int
ibd_output(int index,struct inetgram * ogp)4830Sstevel@tonic-gate ibd_output(int index, struct inetgram *ogp)
4840Sstevel@tonic-gate {
4850Sstevel@tonic-gate int header_len, result;
4860Sstevel@tonic-gate ipoib_ptxhdr_t eh;
4870Sstevel@tonic-gate struct ip *ip;
4880Sstevel@tonic-gate struct in_addr tmpip, ipdst;
4890Sstevel@tonic-gate int broadcast = FALSE;
4900Sstevel@tonic-gate int size;
4910Sstevel@tonic-gate mblk_t *mp;
4920Sstevel@tonic-gate
4930Sstevel@tonic-gate if (!initialized)
4940Sstevel@tonic-gate prom_panic("IPoIB device is not initialized.");
4950Sstevel@tonic-gate
4960Sstevel@tonic-gate if (ogp->igm_level != MEDIA_LVL) {
4970Sstevel@tonic-gate dprintf("ibd_output: frame type wrong: socket: %d\n",
4980Sstevel@tonic-gate index * SOCKETTYPE);
4990Sstevel@tonic-gate errno = EINVAL;
5000Sstevel@tonic-gate return (-1);
5010Sstevel@tonic-gate }
5020Sstevel@tonic-gate
5030Sstevel@tonic-gate header_len = IPOIB_HDRSIZE + IPOIB_ADDRL;
5040Sstevel@tonic-gate mp = ogp->igm_mp;
5050Sstevel@tonic-gate size = mp->b_wptr - mp->b_rptr;
5060Sstevel@tonic-gate if (size > (mac_state.mac_mtu - IPOIB_ADDRL)) {
5070Sstevel@tonic-gate dprintf("ibd_output: frame size too big: %d\n", size);
5080Sstevel@tonic-gate errno = E2BIG;
5090Sstevel@tonic-gate return (-1);
5100Sstevel@tonic-gate }
5110Sstevel@tonic-gate
5120Sstevel@tonic-gate size += header_len;
5130Sstevel@tonic-gate ip = (struct ip *)(mp->b_rptr);
5140Sstevel@tonic-gate
5150Sstevel@tonic-gate eh.ipoib_rhdr.ipoib_type = htons(ETHERTYPE_IP);
5160Sstevel@tonic-gate eh.ipoib_rhdr.ipoib_mbz = 0;
5170Sstevel@tonic-gate bcopy((caddr_t)&ip->ip_dst, (caddr_t)&ipdst, sizeof (ipdst));
5180Sstevel@tonic-gate
5190Sstevel@tonic-gate if (ipdst.s_addr == htonl(INADDR_BROADCAST))
5200Sstevel@tonic-gate broadcast = TRUE; /* limited broadcast */
5210Sstevel@tonic-gate
5220Sstevel@tonic-gate if (!broadcast) {
5230Sstevel@tonic-gate struct in_addr mask;
5240Sstevel@tonic-gate
5250Sstevel@tonic-gate ipv4_getnetmask(&mask);
5260Sstevel@tonic-gate mask.s_addr = htonl(mask.s_addr);
5270Sstevel@tonic-gate if (mask.s_addr != htonl(INADDR_BROADCAST) &&
5280Sstevel@tonic-gate (ipdst.s_addr & ~mask.s_addr) == 0) {
5290Sstevel@tonic-gate broadcast = TRUE; /* directed broadcast */
5300Sstevel@tonic-gate } else {
5310Sstevel@tonic-gate if (ogp->igm_router.s_addr != htonl(INADDR_ANY))
5320Sstevel@tonic-gate tmpip.s_addr = ogp->igm_router.s_addr;
5330Sstevel@tonic-gate else
5340Sstevel@tonic-gate tmpip.s_addr = ipdst.s_addr;
5350Sstevel@tonic-gate
5360Sstevel@tonic-gate result = mac_get_arp(&tmpip, (void *)&eh.ipoib_dest,
5370Sstevel@tonic-gate IPOIB_ADDRL, mac_state.mac_arp_timeout);
5380Sstevel@tonic-gate if (!result) {
5390Sstevel@tonic-gate errno = ETIMEDOUT;
5400Sstevel@tonic-gate dprintf("ibd_output: ARP request for %s "
5410Sstevel@tonic-gate "timed out.\n", inet_ntoa(tmpip));
5420Sstevel@tonic-gate return (-1);
5430Sstevel@tonic-gate }
5440Sstevel@tonic-gate }
5450Sstevel@tonic-gate }
5460Sstevel@tonic-gate
5470Sstevel@tonic-gate if (broadcast)
5480Sstevel@tonic-gate bcopy((caddr_t)&ibdbroadcastaddr, (caddr_t)&eh.ipoib_dest,
5490Sstevel@tonic-gate IPOIB_ADDRL);
5500Sstevel@tonic-gate
5510Sstevel@tonic-gate /* add the ibd header */
5520Sstevel@tonic-gate mp->b_rptr -= sizeof (eh);
5530Sstevel@tonic-gate bcopy((caddr_t)&eh, mp->b_rptr, sizeof (eh));
5540Sstevel@tonic-gate
5550Sstevel@tonic-gate #ifdef DEBUG
5560Sstevel@tonic-gate printf("ibd_output(%d): level(%d) frame(0x%x) len(%d)\n",
5570Sstevel@tonic-gate index, ogp->igm_level, mp->b_rptr, size);
5580Sstevel@tonic-gate #endif /* DEBUG */
5590Sstevel@tonic-gate
5600Sstevel@tonic-gate return (prom_write(mac_state.mac_dev, (char *)mp->b_rptr, size,
5610Sstevel@tonic-gate 0, NETWORK));
5620Sstevel@tonic-gate }
5630Sstevel@tonic-gate
5640Sstevel@tonic-gate void
ibd_init(void)5650Sstevel@tonic-gate ibd_init(void)
5660Sstevel@tonic-gate {
567*789Sahrens pnode_t chosen;
5680Sstevel@tonic-gate char *mtuprop = "ipib-frame-size";
5690Sstevel@tonic-gate char *bcastprop = "ipib-broadcast";
5700Sstevel@tonic-gate char *addrprop = "ipib-address";
5710Sstevel@tonic-gate char *cidprop = "client-id";
5720Sstevel@tonic-gate int cidlen;
5730Sstevel@tonic-gate uint8_t dhcpcid[DHCP_MAX_CID_LEN];
5740Sstevel@tonic-gate
5750Sstevel@tonic-gate mac_state.mac_addr_len = IPOIB_ADDRL;
5760Sstevel@tonic-gate mac_state.mac_addr_buf = bkmem_alloc(mac_state.mac_addr_len);
5770Sstevel@tonic-gate if (mac_state.mac_addr_buf == NULL)
5780Sstevel@tonic-gate prom_panic("ibd_init: Cannot allocate memory.");
5790Sstevel@tonic-gate
5800Sstevel@tonic-gate chosen = prom_finddevice("/chosen");
5810Sstevel@tonic-gate if (chosen == OBP_NONODE || chosen == OBP_BADNODE)
5820Sstevel@tonic-gate prom_panic("ibd_init: Cannot find /chosen.");
5830Sstevel@tonic-gate
5840Sstevel@tonic-gate if (prom_getprop(chosen, addrprop, (caddr_t)mac_state.mac_addr_buf) !=
5850Sstevel@tonic-gate IPOIB_ADDRL)
5860Sstevel@tonic-gate prom_panic("ibd_init: Cannot find /chosen:ipib-address\n.");
5870Sstevel@tonic-gate
5880Sstevel@tonic-gate if (prom_getprop(chosen, bcastprop, (caddr_t)&ibdbroadcastaddr) !=
5890Sstevel@tonic-gate IPOIB_ADDRL)
5900Sstevel@tonic-gate prom_panic("ibd_init: Cannot find /chosen:ipib-broadcast\n.");
5910Sstevel@tonic-gate
5920Sstevel@tonic-gate if (((cidlen = prom_getproplen(chosen, cidprop)) <= 0) ||
5930Sstevel@tonic-gate (cidlen > DHCP_MAX_CID_LEN) || (prom_getprop(chosen, cidprop,
5940Sstevel@tonic-gate (caddr_t)&dhcpcid) != cidlen))
5950Sstevel@tonic-gate prom_panic("ibd_init: Invalid /chosen:client-id\n.");
5960Sstevel@tonic-gate dhcp_set_client_id(dhcpcid, cidlen);
5970Sstevel@tonic-gate
5980Sstevel@tonic-gate /*
5990Sstevel@tonic-gate * Note that prom reports mtu including 20 bytes of
6000Sstevel@tonic-gate * addressing information.
6010Sstevel@tonic-gate */
6020Sstevel@tonic-gate if (prom_getprop(chosen, mtuprop,
6030Sstevel@tonic-gate (caddr_t)&mac_state.mac_mtu) <= 0)
6040Sstevel@tonic-gate mac_state.mac_mtu = IBDSIZE + IPOIB_ADDRL;
6050Sstevel@tonic-gate
6060Sstevel@tonic-gate /*
6070Sstevel@tonic-gate * Tell upper layers that we can support a little
6080Sstevel@tonic-gate * more. We will be taking off these 20 bytes at
6090Sstevel@tonic-gate * the start before we invoke prom_write() to send
6100Sstevel@tonic-gate * over the wire.
6110Sstevel@tonic-gate */
6120Sstevel@tonic-gate mac_state.mac_arp_timeout = IBD_ARP_TIMEOUT;
6130Sstevel@tonic-gate mac_state.mac_in_timeout = IBD_IN_TIMEOUT;
6140Sstevel@tonic-gate
6150Sstevel@tonic-gate mac_state.mac_arp = ibd_arp;
6160Sstevel@tonic-gate mac_state.mac_rarp = ibd_revarp;
6170Sstevel@tonic-gate mac_state.mac_header_len = ibd_header_len;
6180Sstevel@tonic-gate mac_state.mac_input = ibd_input;
6190Sstevel@tonic-gate mac_state.mac_output = ibd_output;
6200Sstevel@tonic-gate }
621