xref: /onnv-gate/usr/src/stand/lib/inet/ibd.c (revision 789:b348f31ed315)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
23*789Sahrens  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <dhcp_impl.h>
300Sstevel@tonic-gate #include <sys/types.h>
310Sstevel@tonic-gate #include <socket_impl.h>
320Sstevel@tonic-gate #include <socket_inet.h>
330Sstevel@tonic-gate #include <sys/time.h>
340Sstevel@tonic-gate #include <sys/socket.h>
350Sstevel@tonic-gate #include <net/if.h>
360Sstevel@tonic-gate #include <net/if_arp.h>
370Sstevel@tonic-gate #include <netinet/in_systm.h>
380Sstevel@tonic-gate #include <netinet/in.h>
390Sstevel@tonic-gate #include <netinet/ip.h>
400Sstevel@tonic-gate #include <netinet/if_ether.h>
410Sstevel@tonic-gate #include <sys/promif.h>
420Sstevel@tonic-gate #include <sys/prom_plat.h>
430Sstevel@tonic-gate #include <sys/salib.h>
440Sstevel@tonic-gate #include <sys/bootdebug.h>
450Sstevel@tonic-gate #include <sys/ib/clients/ibd/ibd.h>
460Sstevel@tonic-gate 
470Sstevel@tonic-gate #include "ipv4.h"
480Sstevel@tonic-gate #include "dhcpv4.h"
490Sstevel@tonic-gate #include "ipv4_impl.h"
500Sstevel@tonic-gate #include "mac.h"
510Sstevel@tonic-gate #include "mac_impl.h"
520Sstevel@tonic-gate #include "ibd_inet.h"
530Sstevel@tonic-gate 
540Sstevel@tonic-gate struct ibd_arp {
550Sstevel@tonic-gate 	struct arphdr	ea_hdr;		/* fixed-size header */
560Sstevel@tonic-gate 	ipoib_mac_t	arp_sha;	/* sender hardware address */
570Sstevel@tonic-gate 	uchar_t		arp_spa[4];	/* sender protocol address */
580Sstevel@tonic-gate 	ipoib_mac_t	arp_tha;	/* target hardware address */
590Sstevel@tonic-gate 	uchar_t		arp_tpa[4];	/* target protocol address */
600Sstevel@tonic-gate };
610Sstevel@tonic-gate 
620Sstevel@tonic-gate extern int errno;
630Sstevel@tonic-gate ipoib_mac_t ibdbroadcastaddr;
640Sstevel@tonic-gate 
650Sstevel@tonic-gate /*
660Sstevel@tonic-gate  * Assumptions about OBP behavior (refer FWARC 2002/702, 2003/251):
670Sstevel@tonic-gate  * 1. prom_write() accepts the 20 byte destination address as the
680Sstevel@tonic-gate  * first component in the send buffer. The buffer pointer points
690Sstevel@tonic-gate  * to the start of this 20 byte address. The length parameter is
700Sstevel@tonic-gate  * the IPoIB datagram size with the 20 byte of destination
710Sstevel@tonic-gate  * address.
720Sstevel@tonic-gate  * 2. OBP will not provide max-frame-size, since obp can only
730Sstevel@tonic-gate  * determine that by querying the IBA mcg, and thus the property
740Sstevel@tonic-gate  * has to be /chosen:ipib-frame-size. This will refer to the IPoIB
750Sstevel@tonic-gate  * link MTU as per section 4.0 of ietf i/d, ie, the 4 byte IPoIB
760Sstevel@tonic-gate  * header plus the IP payload mtu. Plus the 20 bytes of addressing
770Sstevel@tonic-gate  * information.
780Sstevel@tonic-gate  * 3. OBP will not provide mac-address property for IPoIB since there
790Sstevel@tonic-gate  * are built in assumptions about 6 byte address with that. Instead,
800Sstevel@tonic-gate  * /chosen:ipib-address will provide the local address.
810Sstevel@tonic-gate  * 4. prom_read() returns 20 byte 0'ed filler followed by 4 byte
820Sstevel@tonic-gate  * IPoIB header followed by IP payload. The return value is -2,
830Sstevel@tonic-gate  * -1, 0, or the length of the received IPoIB datagram alongwith
840Sstevel@tonic-gate  * the 20 bytes MBZ. The buffer pointer points to the start of
850Sstevel@tonic-gate  * the 20 MBZ bytes. The length parameter reflects the max data
860Sstevel@tonic-gate  * size that should be copied into the buffer including the 20
870Sstevel@tonic-gate  * MBZ bytes.
880Sstevel@tonic-gate  * 5. OBP will not provide chosen-network-type, only
890Sstevel@tonic-gate  * network-interface-type = ipib. On an Infiniband device, this
900Sstevel@tonic-gate  * however does not guarantee that it is a network device.
910Sstevel@tonic-gate  * 6. OBP will provide the DHCP client id in /chosen:client-id.
920Sstevel@tonic-gate  * 7. /chosen:ipib-broadcast will provide the broadcast address.
930Sstevel@tonic-gate  * 8. OBP will validate that RARP is not being used before
940Sstevel@tonic-gate  * allowing boot to proceed to inetboot.
950Sstevel@tonic-gate  */
960Sstevel@tonic-gate 
970Sstevel@tonic-gate struct arp_packet {
980Sstevel@tonic-gate 	ipoib_ptxhdr_t		arp_eh;
990Sstevel@tonic-gate 	struct ibd_arp		arp_ea;
1000Sstevel@tonic-gate };
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate #define	dprintf	if (boothowto & RB_DEBUG) printf
1030Sstevel@tonic-gate 
1040Sstevel@tonic-gate static char *
ibd_print(ipoib_mac_t * ea)1050Sstevel@tonic-gate ibd_print(ipoib_mac_t *ea)
1060Sstevel@tonic-gate {
1070Sstevel@tonic-gate 	unsigned char *macaddr = (unsigned char *)ea;
1080Sstevel@tonic-gate 	static char pbuf[(3 * IPOIB_ADDRL) + 1];
1090Sstevel@tonic-gate 	int i;
1100Sstevel@tonic-gate 	char *ptr = pbuf;
1110Sstevel@tonic-gate 
1120Sstevel@tonic-gate 	ptr = pbuf + sprintf(pbuf, "%x", *macaddr++);
1130Sstevel@tonic-gate 	for (i = 0; i < (IPOIB_ADDRL - 1); i++)
1140Sstevel@tonic-gate 		ptr += sprintf(ptr, ":%x", *macaddr++);
1150Sstevel@tonic-gate 	return (pbuf);
1160Sstevel@tonic-gate }
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate  * Common ARP code. Broadcast the packet and wait for the right response.
1210Sstevel@tonic-gate  *
1220Sstevel@tonic-gate  * If arp is called for, caller expects a hardware address in the
1230Sstevel@tonic-gate  * source hardware address (sha) field of the "out" argument.
1240Sstevel@tonic-gate  *
1250Sstevel@tonic-gate  * IPoIB does not support RARP (see ibd_revarp()).
1260Sstevel@tonic-gate  *
1270Sstevel@tonic-gate  * Returns TRUE if transaction succeeded, FALSE otherwise.
1280Sstevel@tonic-gate  *
1290Sstevel@tonic-gate  * The timeout argument is the number of milliseconds to wait for a
1300Sstevel@tonic-gate  * response. An infinite timeout can be specified as 0xffffffff.
1310Sstevel@tonic-gate  */
1320Sstevel@tonic-gate static int
ibd_comarp(struct arp_packet * out,uint32_t timeout)1330Sstevel@tonic-gate ibd_comarp(struct arp_packet *out, uint32_t timeout)
1340Sstevel@tonic-gate {
1350Sstevel@tonic-gate 	struct arp_packet *in = (struct arp_packet *)mac_state.mac_buf;
1360Sstevel@tonic-gate 	int count, time, feedback, len, delay = 2;
1370Sstevel@tonic-gate 	char    *ind = "-\\|/";
1380Sstevel@tonic-gate 	struct in_addr tmp_ia;
1390Sstevel@tonic-gate 	uint32_t wait_time;
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate 	bcopy((caddr_t)&ibdbroadcastaddr, (caddr_t)&out->arp_eh.ipoib_dest,
1420Sstevel@tonic-gate 	    IPOIB_ADDRL);
1430Sstevel@tonic-gate 
1440Sstevel@tonic-gate 	out->arp_ea.arp_hrd =  htons(ARPHRD_IB);
1450Sstevel@tonic-gate 	out->arp_ea.arp_pro = htons(ETHERTYPE_IP);
1460Sstevel@tonic-gate 	out->arp_ea.arp_hln = IPOIB_ADDRL;
1470Sstevel@tonic-gate 	out->arp_ea.arp_pln = sizeof (struct in_addr);
1480Sstevel@tonic-gate 	bcopy(mac_state.mac_addr_buf, (caddr_t)&out->arp_ea.arp_sha,
1490Sstevel@tonic-gate 	    IPOIB_ADDRL);
1500Sstevel@tonic-gate 	ipv4_getipaddr(&tmp_ia);
1510Sstevel@tonic-gate 	tmp_ia.s_addr = htonl(tmp_ia.s_addr);
1520Sstevel@tonic-gate 	bcopy((caddr_t)&tmp_ia, (caddr_t)out->arp_ea.arp_spa,
1530Sstevel@tonic-gate 	    sizeof (struct in_addr));
1540Sstevel@tonic-gate 	feedback = 0;
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 	wait_time = prom_gettime() + timeout;
1570Sstevel@tonic-gate 	for (count = 0; timeout == ~0U || prom_gettime() < wait_time; count++) {
1580Sstevel@tonic-gate 		if (count == IBD_WAITCNT) {
1590Sstevel@tonic-gate 			/*
1600Sstevel@tonic-gate 			 * Since IPoIB does not support RARP (see ibd_revarp),
1610Sstevel@tonic-gate 			 * we know that out->arp_ea.arp_op == ARPOP_REQUEST.
1620Sstevel@tonic-gate 			 */
1630Sstevel@tonic-gate 			bcopy((caddr_t)out->arp_ea.arp_tpa,
1640Sstevel@tonic-gate 			    (caddr_t)&tmp_ia, sizeof (struct in_addr));
1650Sstevel@tonic-gate 			printf("\nRequesting MAC address for: %s\n",
1660Sstevel@tonic-gate 			    inet_ntoa(tmp_ia));
1670Sstevel@tonic-gate 		}
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate 		(void) prom_write(mac_state.mac_dev, (caddr_t)out,
1700Sstevel@tonic-gate 		    sizeof (*out), 0, NETWORK);
1710Sstevel@tonic-gate 
1720Sstevel@tonic-gate 		if (count >= IBD_WAITCNT)
1730Sstevel@tonic-gate 			printf("%c\b", ind[feedback++ % 4]); /* activity */
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 		time = prom_gettime() + (delay * 1000);	/* broadcast delay */
1760Sstevel@tonic-gate 		while (prom_gettime() <= time) {
1770Sstevel@tonic-gate 			len = prom_read(mac_state.mac_dev, mac_state.mac_buf,
1780Sstevel@tonic-gate 			    mac_state.mac_mtu, 0, NETWORK);
1790Sstevel@tonic-gate 			if (len < sizeof (struct arp_packet))
1800Sstevel@tonic-gate 				continue;
1810Sstevel@tonic-gate 			if (in->arp_ea.arp_pro != ntohs(ETHERTYPE_IP))
1820Sstevel@tonic-gate 				continue;
1830Sstevel@tonic-gate 			/*
1840Sstevel@tonic-gate 			 * Since IPoIB does not support RARP (see ibd_revarp),
1850Sstevel@tonic-gate 			 * we know that out->arp_ea.arp_op == ARPOP_REQUEST.
1860Sstevel@tonic-gate 			 */
1870Sstevel@tonic-gate 			if (in->arp_eh.ipoib_rhdr.ipoib_type !=
1880Sstevel@tonic-gate 			    ntohs(ETHERTYPE_ARP))
1890Sstevel@tonic-gate 				continue;
1900Sstevel@tonic-gate 			if (in->arp_ea.arp_op != ntohs(ARPOP_REPLY))
1910Sstevel@tonic-gate 				continue;
1920Sstevel@tonic-gate 			if (bcmp((caddr_t)in->arp_ea.arp_spa,
1930Sstevel@tonic-gate 			    (caddr_t)out->arp_ea.arp_tpa,
1940Sstevel@tonic-gate 			    sizeof (struct in_addr)) != 0)
1950Sstevel@tonic-gate 				continue;
1960Sstevel@tonic-gate 			if (boothowto & RB_VERBOSE) {
1970Sstevel@tonic-gate 				bcopy((caddr_t)in->arp_ea.arp_spa,
1980Sstevel@tonic-gate 				    (caddr_t)&tmp_ia,
1990Sstevel@tonic-gate 				    sizeof (struct in_addr));
2000Sstevel@tonic-gate 				printf("Found %s @ %s\n",
2010Sstevel@tonic-gate 				    inet_ntoa(tmp_ia),
2020Sstevel@tonic-gate 				    ibd_print(&in->arp_ea.arp_sha));
2030Sstevel@tonic-gate 			}
2040Sstevel@tonic-gate 			/* copy hardware addr into "out" for caller */
2050Sstevel@tonic-gate 			bcopy((caddr_t)&in->arp_ea.arp_sha,
2060Sstevel@tonic-gate 			    (caddr_t)&out->arp_ea.arp_sha, IPOIB_ADDRL);
2070Sstevel@tonic-gate 			return (TRUE);
2080Sstevel@tonic-gate 		}
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate 		delay = delay * 2;	/* Double the request delay */
2110Sstevel@tonic-gate 		if (delay > 64)		/* maximum delay is 64 seconds */
2120Sstevel@tonic-gate 			delay = 64;
2130Sstevel@tonic-gate 	}
2140Sstevel@tonic-gate 	return (FALSE);
2150Sstevel@tonic-gate }
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate /*
2180Sstevel@tonic-gate  * ARP client side
2190Sstevel@tonic-gate  * Broadcasts to determine MAC address given network order IP address.
2200Sstevel@tonic-gate  * See RFC 826
2210Sstevel@tonic-gate  *
2220Sstevel@tonic-gate  * Returns TRUE if successful, FALSE otherwise.
2230Sstevel@tonic-gate  */
2240Sstevel@tonic-gate static int
ibd_arp(struct in_addr * ip,void * hap,uint32_t timeout)2250Sstevel@tonic-gate ibd_arp(struct in_addr *ip, void *hap, uint32_t timeout)
2260Sstevel@tonic-gate {
2270Sstevel@tonic-gate 	ipoib_mac_t *ep = (ipoib_mac_t *)hap;
2280Sstevel@tonic-gate 	struct arp_packet out;
2290Sstevel@tonic-gate 	int result;
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate 	if (!initialized)
2320Sstevel@tonic-gate 		prom_panic("IPoIB device is not initialized.");
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate 	bzero((char *)&out, sizeof (struct arp_packet));
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate 	out.arp_eh.ipoib_rhdr.ipoib_type = htons(ETHERTYPE_ARP);
2370Sstevel@tonic-gate 	out.arp_ea.arp_op = htons(ARPOP_REQUEST);
2380Sstevel@tonic-gate 	bcopy((caddr_t)&ibdbroadcastaddr, (caddr_t)&out.arp_ea.arp_tha,
2390Sstevel@tonic-gate 	    IPOIB_ADDRL);
2400Sstevel@tonic-gate 	bcopy((caddr_t)ip, (caddr_t)out.arp_ea.arp_tpa,
2410Sstevel@tonic-gate 	    sizeof (struct in_addr));
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 	result = ibd_comarp(&out, timeout);
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate 	if (result && (ep != NULL)) {
2460Sstevel@tonic-gate 		bcopy((caddr_t)&out.arp_ea.arp_sha, (caddr_t)ep, IPOIB_ADDRL);
2470Sstevel@tonic-gate 	}
2480Sstevel@tonic-gate 	return (result);
2490Sstevel@tonic-gate }
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate /*
2520Sstevel@tonic-gate  * Reverse ARP client side
2530Sstevel@tonic-gate  * Determine our Internet address given our MAC address
2540Sstevel@tonic-gate  * See RFC 903
2550Sstevel@tonic-gate  */
2560Sstevel@tonic-gate static void
ibd_revarp(void)2570Sstevel@tonic-gate ibd_revarp(void)
2580Sstevel@tonic-gate {
2590Sstevel@tonic-gate 	prom_panic("IPoIB can not boot with RARP.");
2600Sstevel@tonic-gate }
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate /* ARGSUSED */
2630Sstevel@tonic-gate static int
ibd_header_len(struct inetgram * igm)2640Sstevel@tonic-gate ibd_header_len(struct inetgram *igm)
2650Sstevel@tonic-gate {
2660Sstevel@tonic-gate 	/*
2670Sstevel@tonic-gate 	 * We indicate to upper layers to leave enough space
2680Sstevel@tonic-gate 	 * in output buffers for filling in the IPoIB header
2690Sstevel@tonic-gate 	 * and the 20 byte destination address in ibd_output().
2700Sstevel@tonic-gate 	 */
2710Sstevel@tonic-gate 	return (IPOIB_HDRSIZE + IPOIB_ADDRL);
2720Sstevel@tonic-gate }
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate /*
2750Sstevel@tonic-gate  * Handle a IP datagram addressed to our MAC address or to the link
2760Sstevel@tonic-gate  * layer broadcast address. Also respond to ARP requests. Generates
2770Sstevel@tonic-gate  * inetgrams as long as there's data and the mac level IP timeout timer
2780Sstevel@tonic-gate  * hasn't expired. As soon as there is no data, we try for
2790Sstevel@tonic-gate  * IBD_INPUT_ATTEMPTS for more, then exit the loop, even if there is time
2800Sstevel@tonic-gate  * left, since we expect to have data waiting for us when we're called, we just
2810Sstevel@tonic-gate  * don't know how much.
2820Sstevel@tonic-gate  *
2830Sstevel@tonic-gate  * We workaround slow proms (some proms have hard sleeps for as much as 3msec)
2840Sstevel@tonic-gate  * even though there are is data waiting.
2850Sstevel@tonic-gate  *
2860Sstevel@tonic-gate  * Returns the total number of MEDIA_LVL frames placed on the socket.
2870Sstevel@tonic-gate  * Caller is expected to free up the inetgram resources.
2880Sstevel@tonic-gate  */
2890Sstevel@tonic-gate static int
ibd_input(int index)2900Sstevel@tonic-gate ibd_input(int index)
2910Sstevel@tonic-gate {
2920Sstevel@tonic-gate 	struct inetgram		*inp;
2930Sstevel@tonic-gate 	ipoib_ptxhdr_t		*eh;
2940Sstevel@tonic-gate 	int		frames = 0;	/* successful frames */
2950Sstevel@tonic-gate 	int		attempts = 0;	/* failed attempts after success */
2960Sstevel@tonic-gate 	int16_t		len = 0, data_len;
2970Sstevel@tonic-gate 	uint32_t	timeout, reltime;
2980Sstevel@tonic-gate 	uint32_t	pre_pr, post_pr; /* prom_read interval */
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate #ifdef	DEBUG
3010Sstevel@tonic-gate 	int		failures = 0;		/* total failures */
3020Sstevel@tonic-gate 	int		total_attempts = 0;	/* total prom_read */
3030Sstevel@tonic-gate 	int		no_data = 0;		/* no data in prom */
3040Sstevel@tonic-gate 	int		arps = 0;		/* arp requests processed */
3050Sstevel@tonic-gate 	uint32_t	tot_pr = 0;		/* prom_read time */
3060Sstevel@tonic-gate 	uint32_t	tot_pc = 0;		/* inetgram creation time */
3070Sstevel@tonic-gate 	uint32_t	pre_pc;
3080Sstevel@tonic-gate 	uint32_t	now;
3090Sstevel@tonic-gate #endif	/* DEBUG */
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate 	if (!initialized)
3120Sstevel@tonic-gate 		prom_panic("IPoIB device is not initialized.");
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate 	if ((reltime = sockets[index].in_timeout) == 0)
3150Sstevel@tonic-gate 		reltime = mac_state.mac_in_timeout;
3160Sstevel@tonic-gate 	timeout = prom_gettime() + reltime;
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate 	do {
3190Sstevel@tonic-gate 		if (frames > IBD_MAX_FRAMES) {
3200Sstevel@tonic-gate 			/* someone is trying a denial of service attack */
3210Sstevel@tonic-gate 			break;
3220Sstevel@tonic-gate 		}
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate 		/*
3250Sstevel@tonic-gate 		 * The following is being paranoid about possible bugs
3260Sstevel@tonic-gate 		 * where prom_read() returns a nonzero length, even when
3270Sstevel@tonic-gate 		 * it's not read a packet; it zeroes out the header to
3280Sstevel@tonic-gate 		 * compensate. Paranoia from calvin prom (V2) days.
3290Sstevel@tonic-gate 		 */
3300Sstevel@tonic-gate 		bzero(mac_state.mac_buf, sizeof (ipoib_ptxhdr_t));
3310Sstevel@tonic-gate 
3320Sstevel@tonic-gate 		/*
3330Sstevel@tonic-gate 		 * Prom_read() will return 0 or -2 if no data is present. A
3340Sstevel@tonic-gate 		 * return value of -1 means an error has occurred. We adjust
3350Sstevel@tonic-gate 		 * the timeout by calling the time spent in prom_read() "free".
3360Sstevel@tonic-gate 		 * prom_read() returns the number of bytes actually read, but
3370Sstevel@tonic-gate 		 * will only copy "len" bytes into our buffer. Adjust in
3380Sstevel@tonic-gate 		 * case the MTU is wrong.
3390Sstevel@tonic-gate 		 */
3400Sstevel@tonic-gate 		pre_pr = prom_gettime();
3410Sstevel@tonic-gate 		len = prom_read(mac_state.mac_dev, mac_state.mac_buf,
3420Sstevel@tonic-gate 		    mac_state.mac_mtu, 0, NETWORK);
3430Sstevel@tonic-gate 		post_pr = prom_gettime();
3440Sstevel@tonic-gate 		timeout += (post_pr - pre_pr);
3450Sstevel@tonic-gate #ifdef	DEBUG
3460Sstevel@tonic-gate 		tot_pr += (post_pr - pre_pr);
3470Sstevel@tonic-gate 		total_attempts++;
3480Sstevel@tonic-gate #endif	/* DEBUG */
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate 		if (len > mac_state.mac_mtu) {
3510Sstevel@tonic-gate 			dprintf("ibd_input: adjusting MTU %d -> %d\n",
3520Sstevel@tonic-gate 			    mac_state.mac_mtu, len);
3530Sstevel@tonic-gate 			bkmem_free(mac_state.mac_buf, mac_state.mac_mtu);
3540Sstevel@tonic-gate 			mac_state.mac_mtu = len;
3550Sstevel@tonic-gate 			mac_state.mac_buf = bkmem_alloc(mac_state.mac_mtu);
3560Sstevel@tonic-gate 			if (mac_state.mac_buf == NULL) {
3570Sstevel@tonic-gate 				prom_panic("ibd_input: Cannot reallocate "
3580Sstevel@tonic-gate 				    "netbuf memory.");
3590Sstevel@tonic-gate 			}
3600Sstevel@tonic-gate 			len = 0; /* pretend there was no data */
3610Sstevel@tonic-gate 		}
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate 		if (len == -1) {
3640Sstevel@tonic-gate #ifdef	DEBUG
3650Sstevel@tonic-gate 			failures++;
3660Sstevel@tonic-gate #endif	/* DEBUG */
3670Sstevel@tonic-gate 			break;
3680Sstevel@tonic-gate 		}
3690Sstevel@tonic-gate 		if (len == 0 || len == -2) {
3700Sstevel@tonic-gate 			if (frames != 0)
3710Sstevel@tonic-gate 				attempts++;
3720Sstevel@tonic-gate #ifdef	DEBUG
3730Sstevel@tonic-gate 			no_data++;
3740Sstevel@tonic-gate #endif	/* DEBUG */
3750Sstevel@tonic-gate 			continue;
3760Sstevel@tonic-gate 		}
3770Sstevel@tonic-gate 
3780Sstevel@tonic-gate 		eh = (ipoib_ptxhdr_t *)mac_state.mac_buf;
3790Sstevel@tonic-gate 		if (eh->ipoib_rhdr.ipoib_type == ntohs(ETHERTYPE_IP) &&
3800Sstevel@tonic-gate 		    len >= (sizeof (ipoib_ptxhdr_t) + sizeof (struct ip))) {
3810Sstevel@tonic-gate 
3820Sstevel@tonic-gate 			int offset;
3830Sstevel@tonic-gate #ifdef	DEBUG
3840Sstevel@tonic-gate 			pre_pc = prom_gettime();
3850Sstevel@tonic-gate #endif	/* DEBUG */
3860Sstevel@tonic-gate 
3870Sstevel@tonic-gate 			inp = (struct inetgram *)bkmem_zalloc(
3880Sstevel@tonic-gate 			    sizeof (struct inetgram));
3890Sstevel@tonic-gate 			if (inp == NULL) {
3900Sstevel@tonic-gate 				errno = ENOMEM;
3910Sstevel@tonic-gate 				return (frames == 0 ? -1 : frames);
3920Sstevel@tonic-gate 			}
3930Sstevel@tonic-gate 			offset = sizeof (ipoib_ptxhdr_t);
3940Sstevel@tonic-gate 			data_len = len - offset;
3950Sstevel@tonic-gate 			inp->igm_mp = allocb(data_len, 0);
3960Sstevel@tonic-gate 			if (inp->igm_mp == NULL) {
3970Sstevel@tonic-gate 				errno = ENOMEM;
3980Sstevel@tonic-gate 				bkmem_free((caddr_t)inp,
3990Sstevel@tonic-gate 				    sizeof (struct inetgram));
4000Sstevel@tonic-gate 				return (frames == 0 ? -1 : frames);
4010Sstevel@tonic-gate 			}
4020Sstevel@tonic-gate 			bcopy((caddr_t)(mac_state.mac_buf + offset),
4030Sstevel@tonic-gate 			    inp->igm_mp->b_rptr, data_len);
4040Sstevel@tonic-gate 			inp->igm_mp->b_wptr += data_len;
4050Sstevel@tonic-gate 			inp->igm_level = NETWORK_LVL;
4060Sstevel@tonic-gate 			add_grams(&sockets[index].inq, inp);
4070Sstevel@tonic-gate 			frames++;
4080Sstevel@tonic-gate 			attempts = 0;
4090Sstevel@tonic-gate #ifdef	DEBUG
4100Sstevel@tonic-gate 			tot_pc += prom_gettime() - pre_pc;
4110Sstevel@tonic-gate #endif	/* DEBUG */
4120Sstevel@tonic-gate 			continue;
4130Sstevel@tonic-gate 		}
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 		if (eh->ipoib_rhdr.ipoib_type == ntohs(ETHERTYPE_ARP) &&
4160Sstevel@tonic-gate 		    len >= sizeof (struct arp_packet)) {
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 			struct in_addr		ip;
4190Sstevel@tonic-gate 			struct ibd_arp		*ea;
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate #ifdef	DEBUG
4220Sstevel@tonic-gate 			printf("ibd_input: ARP message received\n");
4230Sstevel@tonic-gate 			arps++;
4240Sstevel@tonic-gate #endif	/* DEBUG */
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate 			ea = (struct ibd_arp *)(mac_state.mac_buf +
4270Sstevel@tonic-gate 			    sizeof (ipoib_ptxhdr_t));
4280Sstevel@tonic-gate 			if (ea->arp_pro != ntohs(ETHERTYPE_IP))
4290Sstevel@tonic-gate 				continue;
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate 			ipv4_getipaddr(&ip);
4320Sstevel@tonic-gate 			ip.s_addr = ntohl(ip.s_addr);
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 			if (ea->arp_op == ntohs(ARPOP_REQUEST) &&
4350Sstevel@tonic-gate 			    ip.s_addr != INADDR_ANY &&
4360Sstevel@tonic-gate 			    (bcmp((caddr_t)ea->arp_tpa, (caddr_t)&ip,
4370Sstevel@tonic-gate 			    sizeof (struct in_addr)) == 0)) {
4380Sstevel@tonic-gate 				ea->arp_op = htons(ARPOP_REPLY);
4390Sstevel@tonic-gate 				bcopy((caddr_t)&ea->arp_sha,
4400Sstevel@tonic-gate 				    (caddr_t)&eh->ipoib_dest, IPOIB_ADDRL);
4410Sstevel@tonic-gate 				bcopy((caddr_t)&ea->arp_sha,
4420Sstevel@tonic-gate 				    (caddr_t)&ea->arp_tha, IPOIB_ADDRL);
4430Sstevel@tonic-gate 				bcopy((caddr_t)ea->arp_spa,
4440Sstevel@tonic-gate 				    (caddr_t)ea->arp_tpa,
4450Sstevel@tonic-gate 				    sizeof (struct in_addr));
4460Sstevel@tonic-gate 				bcopy(mac_state.mac_addr_buf,
4470Sstevel@tonic-gate 				    (caddr_t)&ea->arp_sha,
4480Sstevel@tonic-gate 				    mac_state.mac_addr_len);
4490Sstevel@tonic-gate 				bcopy((caddr_t)&ip, (caddr_t)ea->arp_spa,
4500Sstevel@tonic-gate 				    sizeof (struct in_addr));
4510Sstevel@tonic-gate 				(void) prom_write(mac_state.mac_dev,
4520Sstevel@tonic-gate 				    mac_state.mac_buf,
4530Sstevel@tonic-gate 				    sizeof (struct arp_packet), 0, NETWORK);
4540Sstevel@tonic-gate 				/* don't charge for ARP replies */
4550Sstevel@tonic-gate 				timeout += reltime;
4560Sstevel@tonic-gate 			}
4570Sstevel@tonic-gate 		}
4580Sstevel@tonic-gate 	} while (attempts < IBD_INPUT_ATTEMPTS &&
4590Sstevel@tonic-gate #ifdef	DEBUG
4600Sstevel@tonic-gate 	    (now = prom_gettime()) < timeout);
4610Sstevel@tonic-gate #else
4620Sstevel@tonic-gate 	    prom_gettime() < timeout);
4630Sstevel@tonic-gate #endif	/* DEBUG */
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate #ifdef	DEBUG
4660Sstevel@tonic-gate 	printf("ibd_input(%d): T/S/N/A/F/P/M: %d/%d/%d/%d/%d/%d/%d "
4670Sstevel@tonic-gate 	    "T/O: %d < %d = %s\n", index, total_attempts, frames, no_data,
4680Sstevel@tonic-gate 	    arps, failures, tot_pr, tot_pc, now, timeout,
4690Sstevel@tonic-gate 	    (now < timeout) ? "TRUE" : "FALSE");
4700Sstevel@tonic-gate #endif	/* DEBUG */
4710Sstevel@tonic-gate 	return (frames);
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate /*
4750Sstevel@tonic-gate  * Send out an IPoIB datagram. We expect a IP frame appropriately fragmented
4760Sstevel@tonic-gate  * at this level.
4770Sstevel@tonic-gate  *
4780Sstevel@tonic-gate  * Errno is set and -1 is returned if an error occurs. Number of bytes sent
4790Sstevel@tonic-gate  * is returned on success.
4800Sstevel@tonic-gate  */
4810Sstevel@tonic-gate /* ARGSUSED */
4820Sstevel@tonic-gate static int
ibd_output(int index,struct inetgram * ogp)4830Sstevel@tonic-gate ibd_output(int index, struct inetgram *ogp)
4840Sstevel@tonic-gate {
4850Sstevel@tonic-gate 	int			header_len, result;
4860Sstevel@tonic-gate 	ipoib_ptxhdr_t		eh;
4870Sstevel@tonic-gate 	struct ip		*ip;
4880Sstevel@tonic-gate 	struct in_addr		tmpip, ipdst;
4890Sstevel@tonic-gate 	int			broadcast = FALSE;
4900Sstevel@tonic-gate 	int			size;
4910Sstevel@tonic-gate 	mblk_t			*mp;
4920Sstevel@tonic-gate 
4930Sstevel@tonic-gate 	if (!initialized)
4940Sstevel@tonic-gate 		prom_panic("IPoIB device is not initialized.");
4950Sstevel@tonic-gate 
4960Sstevel@tonic-gate 	if (ogp->igm_level != MEDIA_LVL) {
4970Sstevel@tonic-gate 		dprintf("ibd_output: frame type wrong: socket: %d\n",
4980Sstevel@tonic-gate 		    index * SOCKETTYPE);
4990Sstevel@tonic-gate 		errno = EINVAL;
5000Sstevel@tonic-gate 		return (-1);
5010Sstevel@tonic-gate 	}
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 	header_len = IPOIB_HDRSIZE + IPOIB_ADDRL;
5040Sstevel@tonic-gate 	mp = ogp->igm_mp;
5050Sstevel@tonic-gate 	size = mp->b_wptr - mp->b_rptr;
5060Sstevel@tonic-gate 	if (size > (mac_state.mac_mtu - IPOIB_ADDRL)) {
5070Sstevel@tonic-gate 		dprintf("ibd_output: frame size too big: %d\n", size);
5080Sstevel@tonic-gate 		errno = E2BIG;
5090Sstevel@tonic-gate 		return (-1);
5100Sstevel@tonic-gate 	}
5110Sstevel@tonic-gate 
5120Sstevel@tonic-gate 	size += header_len;
5130Sstevel@tonic-gate 	ip = (struct ip *)(mp->b_rptr);
5140Sstevel@tonic-gate 
5150Sstevel@tonic-gate 	eh.ipoib_rhdr.ipoib_type = htons(ETHERTYPE_IP);
5160Sstevel@tonic-gate 	eh.ipoib_rhdr.ipoib_mbz = 0;
5170Sstevel@tonic-gate 	bcopy((caddr_t)&ip->ip_dst, (caddr_t)&ipdst, sizeof (ipdst));
5180Sstevel@tonic-gate 
5190Sstevel@tonic-gate 	if (ipdst.s_addr == htonl(INADDR_BROADCAST))
5200Sstevel@tonic-gate 		broadcast = TRUE; /* limited broadcast */
5210Sstevel@tonic-gate 
5220Sstevel@tonic-gate 	if (!broadcast) {
5230Sstevel@tonic-gate 		struct in_addr mask;
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate 		ipv4_getnetmask(&mask);
5260Sstevel@tonic-gate 		mask.s_addr = htonl(mask.s_addr);
5270Sstevel@tonic-gate 		if (mask.s_addr != htonl(INADDR_BROADCAST) &&
5280Sstevel@tonic-gate 		    (ipdst.s_addr & ~mask.s_addr) == 0) {
5290Sstevel@tonic-gate 			broadcast = TRUE; /* directed broadcast */
5300Sstevel@tonic-gate 		} else {
5310Sstevel@tonic-gate 			if (ogp->igm_router.s_addr != htonl(INADDR_ANY))
5320Sstevel@tonic-gate 				tmpip.s_addr = ogp->igm_router.s_addr;
5330Sstevel@tonic-gate 			else
5340Sstevel@tonic-gate 				tmpip.s_addr = ipdst.s_addr;
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 			result = mac_get_arp(&tmpip, (void *)&eh.ipoib_dest,
5370Sstevel@tonic-gate 			    IPOIB_ADDRL, mac_state.mac_arp_timeout);
5380Sstevel@tonic-gate 			if (!result) {
5390Sstevel@tonic-gate 				errno = ETIMEDOUT;
5400Sstevel@tonic-gate 				dprintf("ibd_output: ARP request for %s "
5410Sstevel@tonic-gate 				    "timed out.\n", inet_ntoa(tmpip));
5420Sstevel@tonic-gate 				return (-1);
5430Sstevel@tonic-gate 			}
5440Sstevel@tonic-gate 		}
5450Sstevel@tonic-gate 	}
5460Sstevel@tonic-gate 
5470Sstevel@tonic-gate 	if (broadcast)
5480Sstevel@tonic-gate 		bcopy((caddr_t)&ibdbroadcastaddr, (caddr_t)&eh.ipoib_dest,
5490Sstevel@tonic-gate 		    IPOIB_ADDRL);
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate 	/* add the ibd header */
5520Sstevel@tonic-gate 	mp->b_rptr -= sizeof (eh);
5530Sstevel@tonic-gate 	bcopy((caddr_t)&eh, mp->b_rptr, sizeof (eh));
5540Sstevel@tonic-gate 
5550Sstevel@tonic-gate #ifdef	DEBUG
5560Sstevel@tonic-gate 	printf("ibd_output(%d): level(%d) frame(0x%x) len(%d)\n",
5570Sstevel@tonic-gate 	    index, ogp->igm_level, mp->b_rptr, size);
5580Sstevel@tonic-gate #endif	/* DEBUG */
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 	return (prom_write(mac_state.mac_dev, (char *)mp->b_rptr, size,
5610Sstevel@tonic-gate 	    0, NETWORK));
5620Sstevel@tonic-gate }
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate void
ibd_init(void)5650Sstevel@tonic-gate ibd_init(void)
5660Sstevel@tonic-gate {
567*789Sahrens 	pnode_t	chosen;
5680Sstevel@tonic-gate 	char	*mtuprop = "ipib-frame-size";
5690Sstevel@tonic-gate 	char	*bcastprop = "ipib-broadcast";
5700Sstevel@tonic-gate 	char	*addrprop = "ipib-address";
5710Sstevel@tonic-gate 	char	*cidprop = "client-id";
5720Sstevel@tonic-gate 	int	cidlen;
5730Sstevel@tonic-gate 	uint8_t	dhcpcid[DHCP_MAX_CID_LEN];
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 	mac_state.mac_addr_len = IPOIB_ADDRL;
5760Sstevel@tonic-gate 	mac_state.mac_addr_buf = bkmem_alloc(mac_state.mac_addr_len);
5770Sstevel@tonic-gate 	if (mac_state.mac_addr_buf == NULL)
5780Sstevel@tonic-gate 		prom_panic("ibd_init: Cannot allocate memory.");
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate 	chosen = prom_finddevice("/chosen");
5810Sstevel@tonic-gate 	if (chosen == OBP_NONODE || chosen == OBP_BADNODE)
5820Sstevel@tonic-gate 		prom_panic("ibd_init: Cannot find /chosen.");
5830Sstevel@tonic-gate 
5840Sstevel@tonic-gate 	if (prom_getprop(chosen, addrprop, (caddr_t)mac_state.mac_addr_buf) !=
5850Sstevel@tonic-gate 	    IPOIB_ADDRL)
5860Sstevel@tonic-gate 		prom_panic("ibd_init: Cannot find /chosen:ipib-address\n.");
5870Sstevel@tonic-gate 
5880Sstevel@tonic-gate 	if (prom_getprop(chosen, bcastprop, (caddr_t)&ibdbroadcastaddr) !=
5890Sstevel@tonic-gate 	    IPOIB_ADDRL)
5900Sstevel@tonic-gate 		prom_panic("ibd_init: Cannot find /chosen:ipib-broadcast\n.");
5910Sstevel@tonic-gate 
5920Sstevel@tonic-gate 	if (((cidlen = prom_getproplen(chosen, cidprop)) <= 0) ||
5930Sstevel@tonic-gate 	    (cidlen > DHCP_MAX_CID_LEN) || (prom_getprop(chosen, cidprop,
5940Sstevel@tonic-gate 	    (caddr_t)&dhcpcid) != cidlen))
5950Sstevel@tonic-gate 		prom_panic("ibd_init: Invalid /chosen:client-id\n.");
5960Sstevel@tonic-gate 	dhcp_set_client_id(dhcpcid, cidlen);
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate 	/*
5990Sstevel@tonic-gate 	 * Note that prom reports mtu including 20 bytes of
6000Sstevel@tonic-gate 	 * addressing information.
6010Sstevel@tonic-gate 	 */
6020Sstevel@tonic-gate 	if (prom_getprop(chosen, mtuprop,
6030Sstevel@tonic-gate 	    (caddr_t)&mac_state.mac_mtu) <= 0)
6040Sstevel@tonic-gate 		mac_state.mac_mtu = IBDSIZE + IPOIB_ADDRL;
6050Sstevel@tonic-gate 
6060Sstevel@tonic-gate 	/*
6070Sstevel@tonic-gate 	 * Tell upper layers that we can support a little
6080Sstevel@tonic-gate 	 * more. We will be taking off these 20 bytes at
6090Sstevel@tonic-gate 	 * the start before we invoke prom_write() to send
6100Sstevel@tonic-gate 	 * over the wire.
6110Sstevel@tonic-gate 	 */
6120Sstevel@tonic-gate 	mac_state.mac_arp_timeout = IBD_ARP_TIMEOUT;
6130Sstevel@tonic-gate 	mac_state.mac_in_timeout = IBD_IN_TIMEOUT;
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate 	mac_state.mac_arp = ibd_arp;
6160Sstevel@tonic-gate 	mac_state.mac_rarp = ibd_revarp;
6170Sstevel@tonic-gate 	mac_state.mac_header_len = ibd_header_len;
6180Sstevel@tonic-gate 	mac_state.mac_input = ibd_input;
6190Sstevel@tonic-gate 	mac_state.mac_output = ibd_output;
6200Sstevel@tonic-gate }
621