xref: /plan9/sys/src/9/pcboot/pxeload.c (revision 07b4782c30a417782adf37507cd9a64239a8e687)
1 /*
2  * 9boot - load next kernel via pxe (bootp, tftp) and start it
3  *
4  * intel says that pxe can only load into the bottom 640K,
5  * and intel's boot agent takes 128K, leaving only 512K for 9boot.
6  *
7  * some of this code is from the old 9load's bootp.c.
8  */
9 #include	"u.h"
10 #include	"../port/lib.h"
11 #include	"mem.h"
12 #include	"dat.h"
13 #include	"fns.h"
14 #include	"io.h"
15 #include	"ureg.h"
16 #include	"pool.h"
17 #include	"../port/netif.h"
18 #include	"etherif.h"
19 #include	"../ip/ip.h"
20 #include	"pxe.h"
21 
22 #define TFTPDEF "135.104.9.6"	/* IP of default tftp server */
23 
24 enum {
25 	Tftpusehdrs =	0,	/* flag: use announce+headers for tftp? */
26 	Debug =		0,
27 
28 	Tftphdrsz =	4,
29 	/*
30 	 * this can be bigger than the ether mtu and
31 	 * will work due to ip fragmentation, at least on v4.
32 	 */
33 	Prefsegsize =	1400,
34 	Maxsegsize =	2048,
35 	Bufsz =		Maxsegsize + 2,
36 };
37 
38 typedef struct Ethaddr Ethaddr;
39 typedef struct Kernname Kernname;
40 typedef struct Openeth Openeth;
41 typedef struct Tftp Tftp;
42 
43 struct Tftp {
44 	uchar	header[Tftphdrsz];
45 	uchar	data[Maxsegsize];
46 };
47 
48 struct Kernname {
49 	char	*edev;
50 	char	*bootfile;
51 };
52 
53 struct Openeth {
54 	/* names */
55 	int	ctlrno;
56 	char	ethname[16];	/* ether%d */
57 	char	netethname[32];	/* /net/ether%d */
58 	char	filename[128];	/* from bootp, for tftp */
59 
60 	Chan	*ifcctl;	/* /net/ipifc/clone */
61 	Chan	*ethctl;	/* /net/etherN/0/ctl, for promiscuous mode */
62 
63 	/* udp connection */
64 	Chan	*udpctl;
65 	Chan	*udpdata;
66 	Pxenetaddr *netaddr;
67 	int	rxactive;
68 };
69 
70 struct Ethaddr {		/* communication with sleep procs */
71 	Openeth	*oe;
72 	Pxenetaddr *a;
73 };
74 
75 static char ethernm[] = "ether";
76 
77 /*
78  * there can be at most one concurrent tftp session until we move these
79  * variables into Openeth or some other struct.
80  */
81 static ushort tftpport;
82 static int tftpblockno;
83 static int tftpphase;
84 static int progress;
85 static int segsize;
86 static Tftp *tftpb;
87 
88 static uchar myea[Eaddrlen];
89 static Pxenetaddr myaddr;		/* actually, local ip addr & port */
90 static Pxenetaddr tftpserv;		/* actually, remote ip addr & port */
91 static Pxenetaddr bootpserv;
92 
93 uchar *
94 etheraddr(Openeth *oe)
95 {
96 	int n;
97 	char name[32], buf[32];
98 	static uchar ea[Eaddrlen];
99 
100 	memset(ea, 0, sizeof ea);
101 	snprint(name, sizeof name, "#l%d/ether%d/addr", oe->ctlrno, oe->ctlrno);
102 	n = readfile(name, buf, sizeof buf - 1);
103 	if (n < 0)
104 		return ea;
105 	buf[n] = '\0';
106 	parseether(ea, buf);
107 	return ea;
108 }
109 
110 static void
111 udpsend(Openeth *oe, Pxenetaddr *a, void *data, int dlen)
112 {
113 	int n;
114 	uchar *buf;
115 	Chan *c;
116 	Etherpkt pkt;
117 	Udphdr *uh;
118 
119 	buf = data;
120 	if (dlen > sizeof pkt)
121 		panic("udpsend: packet too big");
122 
123 	oe->netaddr = a;
124 	/*
125 	 * add Plan 9 UDP pseudo-headers
126 	 */
127 	if (!tftpphase || Tftpusehdrs) {
128 		memset(&pkt, 0, sizeof pkt);
129 		uh = (Udphdr*)&pkt;
130 		memmove(uh + 1, data, dlen);
131 		USED(buf);
132 		buf = (uchar *)uh;
133 		dlen += sizeof *uh;
134 		if (dlen > sizeof pkt)
135 			panic("udpsend: packet too big");
136 
137 		ipmove(uh->laddr, myaddr.ip);
138 		hnputs(uh->lport, myaddr.port);
139 		ipmove(uh->raddr, a->ip);
140 		hnputs(uh->rport, a->port);
141 		if(Debug)
142 			print("udpsend %I!%d -> %I!%d ", uh->laddr,
143 				nhgets(uh->lport), uh->raddr, nhgets(uh->rport));
144 	}
145 	if (waserror()) {
146 		iprint("udp write error\n");
147 		return;			/* send another req later */
148 	}
149 	c = oe->udpdata;
150 	assert(oe->udpdata != nil);
151 	n = devtab[c->type]->write(c, buf, dlen, c->offset);
152 	poperror();
153 	c->offset += n;
154 	if (n != dlen)
155 		print("udpsend: wrote %d/%d\n", n, dlen);
156 	else if (progress)
157 		print(".");
158 }
159 
160 static void
161 nak(Openeth *oe, Pxenetaddr *a, int code, char *msg, int report)
162 {
163 	char buf[4 + 32];
164 
165 	buf[0] = 0;
166 	buf[1] = Tftp_ERROR;
167 	buf[2] = 0;
168 	buf[3] = code;
169 	strncpy(buf+4, msg, sizeof buf - 4 - 1);
170 	udpsend(oe, a, buf, 4 + strlen(buf+4) + 1);
171 	if(report)
172 		print("\ntftp: error(%d): %s\n", code, msg);
173 }
174 
175 /* a is the source address we're looking for */
176 static int
177 tuplematch(Pxenetaddr *a, Udphdr *h)
178 {
179 	int port;
180 	uchar *ip;
181 
182 	if (tftpphase && !Tftpusehdrs)
183 		return 1;
184 	/*
185 	 * we're using udp headers mode, because we're still doing bootp,
186 	 * or we are doing tftp and we chose to use headers mode.
187 	 */
188 	port = a->port;
189 	ip = a->ip;
190 	/*
191 	 * we're accepting any src port or it's from the port we want, and
192 	 * it's from the ip we want or we sent to a broadcast address, and
193 	 * it's for us or it's a broadcast.
194 	 */
195 	return (port == 0 || nhgets(h->rport) == port) &&
196 		(equivip6(h->raddr, ip) || equivip6(ip, IPv4bcast)) &&
197 		(equivip6(h->laddr, myaddr.ip) || equivip6(h->laddr, IPv4bcast));
198 }
199 
200 /* extract UDP payload into data and set a */
201 static int
202 udppayload(Udphdr *h, int len, Pxenetaddr *a, uchar *data, int dlen)
203 {
204 	if(Debug)
205 		print("udprecv %I!%d to %I!%d...\n",
206 			h->raddr, nhgets(h->rport), h->laddr, nhgets(h->lport));
207 
208 	if(a->port != 0 && nhgets(h->rport) != a->port) {
209 		if(Debug)
210 			print("udpport %ux not %ux\n", nhgets(h->rport), a->port);
211 		return -1;
212 	}
213 
214 	if(!equivip6(a->ip, IPv4bcast) && !equivip6(a->ip, h->raddr)) {
215 		if(Debug)
216 			print("bad ip %I not %I\n", h->raddr, a->ip);
217 		return -1;
218 	}
219 
220 	len -= sizeof *h;		/* don't count pseudo-headers */
221 	if(len > dlen) {
222 		print("udp packet too big: %d > %d; from addr %I\n",
223 			len, dlen, h->raddr);
224 		return -1;
225 	}
226 	memmove(data, h + 1, len);	/* skip pseudo-headers */
227 
228 	/* set a from remote address */
229 	ipmove(a->ip, h->raddr);
230 	a->port = nhgets(h->rport);
231 	return len;
232 }
233 
234 static int
235 chanlen(Chan *ch)
236 {
237 	int len;
238 	Dir *dp;
239 
240 	dp = dirchstat(ch);
241 	if (dp == nil)
242 		return -1;
243 	len = dp->length;		/* qlen(cv->rq) in devip */
244 	free(dp);
245 	return len;
246 }
247 
248 static int
249 udprecv(Openeth *oe, Pxenetaddr *a, void *data, int dlen)
250 {
251 	int len, buflen, chlen;
252 	ulong timo, now;
253 	char *buf;
254 	Chan *c;
255 	Etherpkt pkt;
256 
257 	oe->netaddr = a;
258 	/* timo is frequency of tftp ack and broadcast bootp retransmission */
259 	if(oe->rxactive == 0)
260 		timo = 1000;
261 	else
262 		timo = Timeout;
263 	now = TK2MS(m->ticks);
264 	timo += now;			/* deadline */
265 
266 	c = oe->udpdata;
267 	spllo();			/* paranoia */
268 	do {
269 		/*
270 		 * wait for data to arrive or time-out.
271 		 * alarms only work for user procs, so we poll to avoid getting
272 		 * stuck in ipread.
273 		 */
274 		for (chlen = chanlen(c); chlen == 0 && now < timo;
275 		     chlen = chanlen(c)) {
276 			/* briefly give somebody else a chance to run */
277 			tsleep(&up->sleep, return0, 0, 0);
278 			now = TK2MS(m->ticks);
279 		}
280 		if (chlen <= 0) {
281 			print("T");
282 			return -1;		/* timed out */
283 		}
284 
285 		while (waserror()) {
286 			print("read err: %s\n", up->errstr);
287 			tsleep(&up->sleep, return0, 0, 1000);
288 		}
289 
290 		/*
291 		 * using Plan 9 UDP pseudo-headers?
292 		 */
293 		if (tftpphase && !Tftpusehdrs) {
294 			buf = data;	/* read directly in caller's buffer */
295 			buflen = dlen;
296 		} else {
297 			buf = (char *)&pkt;  /* read pkt with hdrs */
298 			buflen = sizeof pkt;
299 		}
300 		/* devtab[c->type]->read calls ipread */
301 		len = devtab[c->type]->read(c, buf, buflen, c->offset);
302 		poperror();
303 
304 		if (len <= 0)
305 			return len;
306 		c->offset += len;
307 	} while (!tuplematch(oe->netaddr, (Udphdr *)buf));
308 
309 	/*
310 	 * using Plan 9 UDP pseudo-headers? extract payload into caller's buf.
311 	 */
312 	if (!tftpphase || Tftpusehdrs)
313 		len = udppayload((Udphdr *)&pkt, len, a, data, dlen);
314 	if (len >= 0)
315 		oe->rxactive = 1;
316 	return len;
317 }
318 
319 static void
320 ack(Openeth *oe, Pxenetaddr *a, int blkno)
321 {
322 	char buf[4];
323 
324 	buf[0] = 0;
325 	buf[1] = Tftp_ACK;
326 	buf[2] = blkno>>8;
327 	buf[3] = blkno;
328 	udpsend(oe, a, buf, sizeof buf);
329 }
330 
331 static char *
332 skipwd(char *wd)
333 {
334 	while (*wd != '\0')
335 		wd++;
336 	return wd + 1;		/* skip terminating NUL */
337 }
338 
339 static int
340 optval(char *opt, char *pkt, int len)
341 {
342 	char *wd, *ep, *p;
343 
344 	ep = pkt + len;
345 	for (p = pkt; p < ep && *p != '\0'; p = skipwd(wd)) {
346 		wd = skipwd(p);
347 		if (cistrcmp(p, opt) == 0)
348 			return strtol(wd, 0, 10);
349 	}
350 	return -1;
351 }
352 
353 /*
354  * send a tftp read request to `a' for name.  if we get a data packet back,
355  * ack it and stash it in tftp for later.
356  *
357  * format of a request packet, from the RFC:
358  *
359  *          2 bytes     string    1 byte     string   1 byte
360  *          ------------------------------------------------
361  *         | Opcode |  Filename  |   0  |    Mode    |   0  |
362  *          ------------------------------------------------
363  */
364 static int
365 tftpread1st(Openeth *oe, Pxenetaddr *a, char *name, Tftp *tftp)
366 {
367 	int i, n, len, rlen, oport, sendack;
368 	static char *buf;
369 
370 	if (buf == nil)
371 		buf = malloc(Bufsz);
372 	buf[0] = 0;
373 	buf[1] = Tftp_READ;
374 	len = 2 + snprint(buf+2, Bufsz - 2, "%s", name) + 1;
375 	len += snprint(buf+len, Bufsz - len, "octet") + 1;
376 	len += snprint(buf+len, Bufsz - len, "blksize") + 1; /* option */
377 	len += snprint(buf+len, Bufsz - len, "%d", Prefsegsize) + 1;
378 
379 	/*
380 	 * keep sending the same packet until we get an answer.
381 	 */
382 	if (Debug)
383 		print("tftpread1st %s\n", name);
384 	oe->netaddr = a;
385 	/*
386 	 * the first packet or two sent seem to get dropped,
387 	 * so use a shorter time-out on the first packet.
388 	 */
389 	oe->rxactive = 0;
390 	oport = a->port;
391 	tftpblockno = 0;
392 	segsize = Defsegsize;
393 	sendack = 0;
394 	for(i = 0; i < 10; i++){
395 		a->port = oport;
396 		if (sendack)
397 			ack(oe, a, tftpblockno);
398 		else
399 			udpsend(oe, a, buf, len);	/* tftp read name */
400 
401 		if((rlen = udprecv(oe, a, tftp, sizeof(Tftp))) < Tftphdrsz)
402 			continue;		/* runt or time-out */
403 
404 		switch((tftp->header[0]<<8)|tftp->header[1]){
405 
406 		case Tftp_ERROR:
407 			print("tftpread1st: error (%d): %s\n",
408 				(tftp->header[2]<<8)|tftp->header[3], (char*)tftp->data);
409 			return -1;
410 
411 		case Tftp_OACK:
412 			n = optval("blksize", (char *)tftp->header+2, rlen-2);
413 			if (n <= 0) {
414 				nak(oe, a, 0, "bad blksize option value", 0);
415 				return -1;
416 			}
417 			segsize = n;
418 			/* no bytes stashed in tftp.data */
419 			i = 0;
420 			sendack = 1;
421 			break;
422 
423 		case Tftp_DATA:
424 			tftpblockno = 1;
425 			len = (tftp->header[2]<<8)|tftp->header[3];
426 			if(len != tftpblockno){
427 				print("tftpread1st: block error: %d\n", len);
428 				nak(oe, a, 1, "block error", 0);
429 				return -1;
430 			}
431 			rlen -= Tftphdrsz;
432 			if(rlen < segsize)
433 				/* ACK now, in case we don't later */
434 				ack(oe, a, tftpblockno);
435 			return rlen;
436 
437 		default:
438 			print("tftpread1st: unexpected pkt type recv'd\n");
439 			nak(oe, a, 0, "unexpected pkt type recv'd", 0);
440 			return -1;
441 		}
442 	}
443 
444 	print("tftpread1st: failed to connect to server (%I!%d)\n", a->ip, oport);
445 	return -1;
446 }
447 
448 static int
449 tftpread(Openeth *oe, Pxenetaddr *a, Tftp *tftp, int dlen)
450 {
451 	int try, blockno, len;
452 
453 	dlen += Tftphdrsz;
454 
455 	/*
456 	 * keep sending ACKs until we get an answer.
457 	 */
458 	for(try = 0; try < 10; try++) {
459 		ack(oe, a, tftpblockno);
460 
461 		len = udprecv(oe, a, tftp, dlen);
462 		/*
463 		 * NB: not `<='; just a header is legal and happens when
464 		 * file being read is a multiple of segsize bytes long.
465 		 */
466 		if(len < Tftphdrsz){
467 			if(Debug)
468 				print("tftpread: too short %d <= %d\n",
469 					len, Tftphdrsz);
470 			continue;
471 		}
472 		switch((tftp->header[0]<<8)|tftp->header[1]){
473 		case Tftp_ERROR:
474 			print("tftpread: error (blk %d): %s\n",
475 				(tftp->header[2]<<8)|tftp->header[3],
476 				(char*)tftp->data);
477 			nak(oe, a, 0, "error pkt recv'd", 0);
478 			return -1;
479 		case Tftp_OACK:
480 			print("tftpread: oack pkt recv'd too late\n");
481 			nak(oe, a, 0, "oack pkt recv'd too late", 0);
482 			return -1;
483 		default:
484 			print("tftpread: unexpected pkt type recv'd\n");
485 			nak(oe, a, 0, "unexpected pkt type recv'd", 0);
486 			return -1;
487 		case Tftp_DATA:
488 			break;
489 		}
490 		blockno = (tftp->header[2]<<8)|tftp->header[3];
491 		if(blockno <= tftpblockno){
492 			if(Debug)
493 				print("tftpread: blkno %d <= %d\n",
494 					blockno, tftpblockno);
495 			continue;
496 		}
497 
498 		if(blockno == tftpblockno+1) {
499 			tftpblockno++;
500 			if(len < dlen)	/* last packet? send final ack */
501 				ack(oe, a, tftpblockno);
502 			return len-Tftphdrsz;
503 		}
504 		print("tftpread: block error: %d, expected %d\n",
505 			blockno, tftpblockno+1);
506 	}
507 
508 	return -1;
509 }
510 
511 /*
512  * broadcast a bootp request for file.  stash any answer in rep.
513  */
514 static int
515 bootpbcast(Openeth *oe, char *file, Bootp *rep)
516 {
517 	Bootp req;
518 	int i;
519 	uchar *ea;
520 	char name[128], *filename, *sysname;
521 	static char zeroes[IPaddrlen];
522 
523 	oe->filename[0] = '\0';
524 	if (Debug)
525 		if (file == nil)
526 			print("bootpopen: %s...", oe->ethname);
527 		else
528 			print("bootpopen: %s!%s...", oe->ethname, file);
529 	if((ea = etheraddr(oe)) == nil){
530 		print("bad ether %s\n", oe->ethname);
531 		return -1;
532 	}
533 
534 	filename = nil;
535 	sysname = 0;
536 	if(file && *file){
537 		strncpy(name, file, sizeof name);
538 		if(filename = strchr(name, '!')){
539 			sysname = name;
540 			*filename++ = 0;
541 		}
542 		else
543 			filename = name;
544 	}
545 
546 	/*
547 	 * form a bootp request packet
548 	 */
549 	memset(&req, 0, sizeof(req));
550 	req.op = Bootrequest;
551 	req.htype = 1;			/* ethernet */
552 	req.hlen = Eaddrlen;		/* ethernet */
553 	memmove(req.chaddr, ea, Eaddrlen);
554 	req.flags[0] = 0x80;		/* request broadcast reply */
555 	if(filename != nil) {
556 		strncpy(req.file, filename, sizeof(req.file));
557 		strncpy(oe->filename, filename, sizeof oe->filename);
558 	}
559 	if(sysname != nil)		/* if server name given, supply it */
560 		strncpy(req.sname, sysname, sizeof(req.sname));
561 
562 	if (memcmp(myaddr.ip, zeroes, sizeof myaddr.ip) == 0)
563 		ipmove(myaddr.ip, IPv4bcast);	/* didn't know my ip yet */
564 	myaddr.port = BPportsrc;
565 	memmove(myea, ea, Eaddrlen);
566 
567 	/* send to 255.255.255.255!67 */
568 	ipmove(bootpserv.ip, IPv4bcast);
569 	bootpserv.port = BPportdst;
570 
571 	/*
572 	 * send it until we get a matching answer
573 	 */
574 	memset(rep, 0, sizeof *rep);
575 	for(i = 10; i > 0; i--) {
576 		req.xid[0] = i;			/* try different xids */
577 		udpsend(oe, &bootpserv, &req, sizeof(req));
578 
579 		if(udprecv(oe, &bootpserv, rep, sizeof(*rep)) <= 0)
580 			continue;
581 		if(memcmp(req.chaddr, rep->chaddr, Eaddrlen) != 0)
582 			continue;
583 		if(rep->htype != 1 || rep->hlen != Eaddrlen)
584 			continue;
585 		if(sysname == 0 || strcmp(sysname, rep->sname) == 0)
586 			break;
587 	}
588 	if(i <= 0) {
589 		if (file == nil)
590 			print("bootp on %s timed out\n", oe->ethname);
591 		else
592 			print("bootp on %s for %s timed out\n", oe->ethname, file);
593 		return -1;
594 	}
595 	return 0;
596 }
597 
598 /*
599  * request file via tftp from server named in rep.
600  * initial data packet will be stashed in tftpb.
601  */
602 static int
603 tftpopen(Openeth *oe, char *file, Bootp *rep)
604 {
605 	char *filename;
606 	char buf[128];
607 	static uchar ipv4noaddr[IPv4addrlen];
608 
609 	/*
610 	 * read file from tftp server in bootp answer
611 	 */
612 	filename = oe->filename;
613 	if (file)
614 		filename = file;
615 	if(filename == 0 || *filename == 0){
616 		if(strcmp(rep->file, "/386/9boot") == 0 ||
617 		   strcmp(rep->file, "/386/9pxeload") == 0) {
618 			print("won't load another boot loader (%s)\n", rep->file);
619 			return -1;		/* avoid infinite loop */
620 		}
621 		filename = rep->file;
622 	}
623 
624 	print("\n");
625 	if(rep->sname[0] != '\0')
626 		print("%s ", rep->sname);
627 
628 	v4tov6(myaddr.ip, rep->yiaddr);
629 	myaddr.port = tftpport;
630 	if (equivip4(rep->siaddr, ipv4noaddr)) { /* no server address? */
631 		getstr("tftp server IP address", buf, sizeof buf, TFTPDEF, 0);
632 		v4parseip(rep->siaddr, buf);
633 	}
634 	v4tov6(tftpserv.ip, rep->siaddr);
635 	tftpserv.port = TFTPport;
636 	if (tftpb == nil)
637 		tftpb = malloc(sizeof *tftpb);
638 
639 	print("(%V!%d): %s ", rep->siaddr, tftpserv.port, filename);
640 
641 	return tftpread1st(oe, &tftpserv, filename, tftpb);
642 }
643 
644 int
645 tftpboot(Openeth *oe, char *file, Bootp *rep, Boot *b)
646 {
647 	int n;
648 
649 	if((n = tftpopen(oe, file, rep)) < 0)
650 		return -1;
651 
652 	progress = 0;			/* no more dots; we're on a roll now */
653 	print(" ");			/* after "sys (ip!port): kernel ..." */
654 	while(bootpass(b, tftpb->data, n) == MORE){
655 		n = tftpread(oe, &tftpserv, tftpb, segsize);
656 		if(n < segsize)
657 			break;
658 	}
659 	if(0 < n && n < segsize)	/* got to end of file */
660 		bootpass(b, tftpb->data, n);
661 	else
662 		nak(oe, &tftpserv, 3, "ok", 0);	/* tftpclose to abort transfer */
663 	bootpass(b, nil, 0);	/* boot if possible */
664 	return -1;
665 }
666 
667 /* leave the channel to /net/ipifc/clone open */
668 static int
669 binddevip(Openeth *oe)
670 {
671 	Chan *icc;
672 	char buf[32];
673 
674 	if (waserror()) {
675 		print("binddevip: can't bind ether %s: %s\n",
676 			oe->netethname, up->errstr);
677 		nexterror();
678 	}
679 	/* get a new ip interface */
680 	oe->ifcctl = icc = namecopen("/net/ipifc/clone", ORDWR);
681 	if(icc == nil)
682 		error("can't open /net/ipifc/clone");
683 
684 	/*
685 	 * specify medium as ethernet, bind the interface to it.
686 	 * this should trigger chandial of types 0x800, 0x806 and 0x86dd.
687 	 */
688 	snprint(buf, sizeof buf, "bind ether %s", oe->netethname);
689 	devtab[icc->type]->write(icc, buf, strlen(buf), 0);  /* bind ether %s */
690 	poperror();
691 	return 0;
692 }
693 
694 /* set the default route */
695 static int
696 adddefroute(char *, uchar *gaddr)
697 {
698 	char buf[64];
699 	Chan *rc;
700 
701 	rc = nil;
702 	if (waserror()) {
703 		if (rc)
704 			cclose(rc);
705 		return -1;
706 	}
707 	rc = enamecopen("/net/iproute", ORDWR);
708 
709 	if(isv4(gaddr))
710 		snprint(buf, sizeof buf, "add 0 0 %I", gaddr);
711 	else
712 		snprint(buf, sizeof buf, "add :: /0 %I", gaddr);
713 	devtab[rc->type]->write(rc, buf, strlen(buf), 0);
714 	poperror();
715 	cclose(rc);
716 	return 0;
717 }
718 
719 static int
720 validip(uchar *ip)
721 {
722 	return ipcmp(ip, IPnoaddr) != 0 && ipcmp(ip, v4prefix) != 0;
723 }
724 
725 static int
726 openetherdev(Openeth *oe)
727 {
728 	int n;
729 	char num[16];
730 	Chan *c;
731 	static char promisc[] = "promiscuous";
732 
733 	if (chdir(oe->netethname) < 0)
734 		return -1;			/* out of ethers */
735 
736 	oe->ethctl = nil;
737 	if (waserror()) {
738 		print("error opening /net/ether%d/0/ctl: %s\n",
739 			oe->ctlrno, up->errstr);
740 		if (oe->ethctl) {
741 			cclose(oe->ethctl);
742 			oe->ethctl = nil;
743 		}
744 		chdir("/");			/* don't hold conv. open */
745 		return -1;
746 	}
747 	oe->ethctl = c = namecopen("0/ctl", ORDWR);	/* should be ipv4 */
748 	if (c == nil) {
749 		/* read clone file to make conversation 0 since not present */
750 		oe->ethctl = c = enamecopen("clone", ORDWR);
751 		n = devtab[c->type]->read(c, num, sizeof num - 1, 0);
752 		if (n < 0)
753 			print("no %s/clone: %s\n", oe->netethname, up->errstr);
754 		else {
755 			num[n] = 0;
756 			print("%s/clone returned %s\n", oe->netethname, num);
757 		}
758 	}
759 	/* shouldn't be needed to read bootp (broadcast) reply */
760 	devtab[c->type]->write(c, promisc, sizeof promisc-1, 0);
761 	poperror();
762 	chdir("/");
763 	/* leave oe->ethctl open to keep promiscuous mode on */
764 	return 0;
765 }
766 
767 /* add a logical interface to the ip stack */
768 int
769 minip4cfg(Openeth *oe)
770 {
771 	int n;
772 	char buf[64];
773 
774 	n = snprint(buf, sizeof buf, "add %I", IPnoaddr);
775 	devtab[oe->ifcctl->type]->write(oe->ifcctl, buf, n, 0);	/* add %I */
776 
777 	openetherdev(oe);
778 	return 0;
779 }
780 
781 /* remove the :: address added by minip4cfg */
782 int
783 unminip4cfg(Openeth *oe)
784 {
785 	int n;
786 	char buf[64];
787 
788 	n = snprint(buf, sizeof buf, "remove %I /128", IPnoaddr);
789 	if (waserror()) {
790 		print("failed write to ifc: %s: %s\n", buf, up->errstr);
791 		return -1;
792 	}
793 	devtab[oe->ifcctl->type]->write(oe->ifcctl, buf, n, 0);	/* remove %I */
794 	cclose(oe->ethctl);		/* turn promiscuous mode off */
795 	oe->ethctl = nil;
796 	poperror();
797 	return 0;
798 }
799 
800 /*
801  * parse p, looking for option `op'.  if non-nil, np points to minimum length.
802  * return nil if option is too small, else ptr to opt, and
803  * store actual length via np if non-nil.
804  */
805 uchar*
806 optget(uchar *p, int op, int *np)
807 {
808 	int len, code;
809 
810 	while ((code = *p++) != OBend) {
811 		if(code == OBpad)
812 			continue;
813 		len = *p++;
814 		if(code != op) {
815 			p += len;
816 			continue;
817 		}
818 		if(np != nil){
819 			if(*np > len) {
820 				return 0;
821 			}
822 			*np = len;
823 		}
824 		return p;
825 	}
826 	return 0;
827 }
828 
829 int
830 optgetaddr(uchar *p, int op, uchar *ip)
831 {
832 	int len;
833 
834 	len = 4;
835 	p = optget(p, op, &len);
836 	if(p == nil)
837 		return 0;
838 	v4tov6(ip, p);
839 	return 1;
840 }
841 
842 int beprimary = 1;
843 
844 /* add a logical interface to the ip stack */
845 int
846 ip4cfg(Openeth *oe, Bootp *rep)
847 {
848 	int n;
849 	uchar gaddr[IPaddrlen], v6mask[IPaddrlen];
850 	uchar v4mask[IPv4addrlen];
851 	char buf[64];
852 	static uchar zeroes[4];
853 
854 	v4tov6(gaddr, rep->yiaddr);
855 	if(!validip(gaddr))
856 		return -1;
857 
858 	/* dig subnet mask, if any, out of options.  if none, guess. */
859 	if(optgetaddr(rep->optdata, OBmask, v6mask)) {
860 		v6tov4(v4mask, v6mask);
861 		n = snprint(buf, sizeof buf, "add %V %M", rep->yiaddr, v4mask);
862 	} else
863 		n = snprint(buf, sizeof buf, "add %V 255.255.255.0", rep->yiaddr);
864 
865 	devtab[oe->ifcctl->type]->write(oe->ifcctl, buf, n, 0);
866 
867 	v4tov6(gaddr, rep->giaddr);
868 	if(beprimary==1 && validip(gaddr) && !equivip4(rep->giaddr, zeroes))
869 		adddefroute("/net", gaddr);
870 	return 0;
871 }
872 
873 static int
874 openudp(Openeth *oe)
875 {
876 	int n;
877 	char buf[16];
878 	Chan *cc;
879 
880 	/* read clone file for conversation number */
881 	if (waserror())
882 		panic("openudp: can't open /net/udp/clone");
883 	cc = enamecopen("/net/udp/clone", ORDWR);
884 	oe->udpctl = cc;
885 	n = devtab[cc->type]->read(cc, buf, sizeof buf - 1, 0);
886 	poperror();
887 	buf[n] = '\0';
888 	return atoi(buf);
889 }
890 
891 static void
892 initbind(Openeth *oe)
893 {
894 	char buf[8];
895 
896 	if (waserror()) {
897 		print("error while binding: %s\n", up->errstr);
898 		return;
899 	}
900 	snprint(buf, sizeof buf, "#I%d", oe->ctlrno);
901 	bind(buf, "/net", MAFTER);
902 	snprint(buf, sizeof buf, "#l%d", oe->ctlrno);
903 	bind(buf, "/net", MAFTER);
904 	binddevip(oe);
905 	poperror();
906 }
907 
908 static void
909 closeudp(Openeth *oe)
910 {
911 	if (oe->udpctl) {
912 		cclose(oe->udpctl);
913 		oe->udpctl = nil;
914 	}
915 	if (oe->udpdata) {
916 		cclose(oe->udpdata);
917 		oe->udpdata = nil;
918 	}
919 }
920 
921 static int
922 announce(Openeth *oe, char *port)
923 {
924 	int udpconv;
925 	char buf[32];
926 	static char hdrs[] = "headers";
927 
928 	while (waserror()) {
929 		print("can't announce udp!*!%s: %s\n", port, up->errstr);
930 		closeudp(oe);
931 		nexterror();
932 	}
933 	udpconv = openudp(oe);
934 	if (udpconv < 0)
935 		panic("can't open udp conversation: %s", up->errstr);
936 
937 	/* headers is only effective after a udp announce */
938 	snprint(buf, sizeof buf, "announce %s", port);
939 	devtab[oe->udpctl->type]->write(oe->udpctl, buf, strlen(buf), 0);
940 	devtab[oe->udpctl->type]->write(oe->udpctl, hdrs, sizeof hdrs - 1, 0);
941 	poperror();
942 
943 	/* now okay to open the data file */
944 	snprint(buf, sizeof buf, "/net/udp/%d/data", udpconv);
945 	/*
946 	 * we must use create, not open, to get Conv->rq and ->wq
947 	 * allocated by udpcreate.
948 	 */
949 	oe->udpdata = enameccreate(buf, ORDWR);
950 	cclose(oe->udpctl);
951 	oe->udpctl = nil;
952 	return udpconv;
953 }
954 
955 static long
956 tftprdfile(Openeth *oe, int openread, void* va, long len)
957 {
958 	int n;
959 	char *p, *v;
960 
961 	n = openread;	/* have read this many bytes already into tftpb->data */
962 	p = v = va;
963 	len--;				/* leave room for NUL */
964 	while(n > 0) {
965 		if((p-v)+n > len)
966 			n = len - (p-v);
967 		memmove(p, tftpb->data, n);
968 		p += n;
969 		*p = 0;
970 		if(n != segsize)
971 			break;
972 
973 		if((n = tftpread(oe, &tftpserv, tftpb, segsize)) < 0)
974 			return -1;
975 	}
976 	return p-v;
977 }
978 
979 static int
980 newtftpconn(Openeth *oe, Bootp *rep)
981 {
982 	char num[16], dialstr[64];
983 
984 	if (waserror()) {
985 		print("can't dial: %s\n", up->errstr);
986 		return -1;
987 	}
988 	closeudp(oe);
989 
990 	tftpphase = 1;
991 	tftpport = 5000 + nrand(20480);
992 	snprint(num, sizeof num, "%d", tftpport);
993 	if (Tftpusehdrs)
994 		announce(oe, num);
995 	else {
996 		snprint(dialstr, sizeof dialstr, "/net/udp!%V!%d",
997 			rep->siaddr, TFTPport);
998 		oe->udpdata = chandial(dialstr, num, nil, nil);
999 		oe->udpctl = nil;
1000 	}
1001 	poperror();
1002 	return 0;
1003 }
1004 
1005 static int
1006 setipcfg(Openeth *oe, Bootp *rep)
1007 {
1008 	int r;
1009 
1010 	tftpphase = 0;
1011 	progress = 1;
1012 
1013 	/* /net/iproute is unpopulated here; add at least broadcast */
1014 	minip4cfg(oe);
1015 	announce(oe, "68");
1016 	r = bootpbcast(oe, nil, rep);
1017 	closeudp(oe);
1018 	unminip4cfg(oe);
1019 	if(r < 0)
1020 		return -1;
1021 
1022 	ip4cfg(oe, rep);
1023 	if (Debug)
1024 		print("got & set ip config\n");
1025 	return 0;
1026 }
1027 
1028 static int
1029 getkernname(Openeth *oe, Bootp *rep, Kernname *kp)
1030 {
1031 	int n;
1032 	char *ini, *p;
1033 	char cfgpxe[32], buf[64];
1034 
1035 	if (kp->bootfile) {
1036 		print("getkernname: already have bootfile %s\n", kp->bootfile);
1037 		return 0;
1038 	}
1039 	if (newtftpconn(oe, rep) < 0)
1040 		return -1;
1041 
1042 	/* use our mac address instead of relying on a bootp answer */
1043 	snprint(cfgpxe, sizeof cfgpxe, "/cfg/pxe/%E", myea);
1044 	/*
1045 	 * use bootp answer (rep) to open cfgpxe.
1046 	 * reads first pkt of cfgpxe into tftpb->data.
1047 	 */
1048 	n = tftpopen(oe, cfgpxe, rep);
1049 	if (n < 0) {
1050 		print("\nfailed.\n");
1051 		return -1;
1052 	}
1053 	if (Debug)
1054 		print("\opened %s\n", cfgpxe);
1055 
1056 	ini = smalloc(2*BOOTARGSLEN);
1057 	/* starts by copying data from tftpb->data into ini */
1058 	n = tftprdfile(oe, n, ini, 2*BOOTARGSLEN);
1059 	if (n < 0) {
1060 		print("error reading %s\n", cfgpxe);
1061 		free(ini);
1062 		return -1;
1063 	}
1064 	print(" read %d bytes", n);
1065 
1066 	/*
1067 	 * take note of plan9.ini contents.  consumes ini to make config vars,
1068 	 * thus we can't free ini.
1069 	 */
1070 	dotini(ini);
1071 	i8250console();		/* configure serial port with defaults */
1072 
1073 	kp->edev = kp->bootfile = nil;
1074 	p = getconf("bootfile");
1075 	if (p)
1076 		kstrdup(&kp->bootfile, p);
1077 	if (kp->bootfile == nil)
1078 		askbootfile(buf, sizeof buf, &kp->bootfile, Promptsecs,
1079 			"ether0!/386/9pccpu");
1080 	if (strcmp(kp->bootfile, "manual") == 0)
1081 		askbootfile(buf, sizeof buf, &kp->bootfile, 0, "");
1082 
1083 	p = strchr(kp->bootfile, '!');
1084 	if (p != nil) {
1085 		*p++ = '\0';
1086 		kp->edev = kp->bootfile;
1087 		kp->bootfile = nil;
1088 		kstrdup(&kp->bootfile, p);
1089 		if (strncmp(kp->edev, ethernm, sizeof ethernm - 1) != 0) {
1090 			print("bad ether device %s\n", kp->edev);
1091 			return -1;
1092 		}
1093 	}
1094 
1095 	/* pass arguments to kernels that can use them */
1096 	strecpy(BOOTLINE, BOOTLINE+BOOTLINELEN, kp->bootfile);
1097 	p = strchr(kp->bootfile, ' ');
1098 	if(p != nil)
1099 		*p = '\0';
1100 	return 0;
1101 }
1102 
1103 static void
1104 unbinddevip(Openeth *oe)
1105 {
1106 	Chan *icc;
1107 	static char unbind[] = "unbind";
1108 
1109 	icc = oe->ifcctl;
1110 	if (icc) {
1111 		devtab[icc->type]->write(icc, unbind, sizeof unbind - 1, 0);
1112 		cclose(icc);
1113 		oe->ifcctl = nil;
1114 	}
1115 }
1116 
1117 /*
1118  * phase 1: get our ip (v4) configuration via bootp, set new ip configuration.
1119  * phase 2: load /cfg/pxe, parse it, extract kernel filename.
1120  * phase 3: load kernel and jump to it.
1121  */
1122 static void
1123 tftpload(Openeth *oe, Kernname *kp)
1124 {
1125 	Bootp rep;
1126 	Boot boot;
1127 
1128 	if(waserror()) {
1129 		print("tftpload: %s\n", up->errstr);
1130 		closeudp(oe);
1131 		unbinddevip(oe);
1132 		return;
1133 	}
1134 
1135 	memset(&rep, 0, sizeof rep);
1136 	if (setipcfg(oe, &rep) >= 0 &&
1137 	    getkernname(oe, &rep, kp) >= 0 &&
1138 	    (!kp->edev ||
1139 	     oe->ctlrno == strtol(kp->edev + sizeof ethernm - 1, 0, 10)) &&
1140 	    newtftpconn(oe, &rep) >= 0) {
1141 		memset(&boot, 0, sizeof boot);
1142 		boot.state = INITKERNEL;
1143 		tftpboot(oe, kp->bootfile, &rep, &boot);
1144 	}
1145 
1146 	/* we failed or bootfile asked for another ether */
1147 	poperror();
1148 	closeudp(oe);
1149 	unbinddevip(oe);
1150 }
1151 
1152 static int
1153 etherload(int eth, Kernname *kp)
1154 {
1155 	Openeth *oe;
1156 
1157 	print("pxe on ether%d ", eth);
1158 	oe = smalloc(sizeof *oe);
1159 	memset(oe, 0, sizeof *oe);
1160 	oe->ctlrno = eth;
1161 	snprint(oe->ethname, sizeof oe->ethname, "ether%d", oe->ctlrno);
1162 	snprint(oe->netethname, sizeof oe->netethname, "/net/ether%d",
1163 		oe->ctlrno);
1164 	initbind(oe);
1165 
1166 	tftpload(oe, kp);
1167 
1168 	/* failed to boot; keep going */
1169 	unmount(nil, "/net");
1170 	return 0;
1171 }
1172 
1173 static int
1174 attacheth(int neth)
1175 {
1176 	char num[4];
1177 	Chan *cc;
1178 
1179 	cc = nil;
1180 	if (waserror()) {		/* no more interfaces */
1181 		if (cc)
1182 			cclose(cc);
1183 		return -1;
1184 	}
1185 	snprint(num, sizeof num, "%d", neth);
1186 	cc = etherattach(num);
1187 	if (cc)
1188 		cclose(cc);
1189 	poperror();
1190 	return cc == nil? -1: 0;
1191 }
1192 
1193 void
1194 bootloadproc(void *)
1195 {
1196 	int eth, neth, needattach;
1197 	Kernname kernnm;
1198 
1199 	srand(TK2MS(m->ticks));			/* for local port numbers */
1200 	nrand(20480);				/* 1st # is always 0; toss it */
1201 	kernnm.edev = kernnm.bootfile = nil;
1202 
1203 	while(waserror()) {
1204 		print("%s\n", up->errstr);
1205 		tsleep(&up->sleep, return0, 0, 30*1000);
1206 	}
1207 	neth = MaxEther;
1208 	needattach = 1;
1209 	for (;;) {
1210 		/* try each interface in turn: first get /cfg/pxe file */
1211 		for (eth = 0; eth < neth && kernnm.edev == nil; eth++) {
1212 			if (needattach && attacheth(eth) < 0)
1213 				break;
1214 			etherload(eth, &kernnm);
1215 		}
1216 		if (needattach) {
1217 			neth = eth;
1218 			needattach = 0;
1219 			if (neth == 0)
1220 				print("no ethernet interfaces found\n");
1221 		}
1222 		if (kernnm.edev != nil) {
1223 			eth = strtol(kernnm.edev + sizeof ethernm - 1, 0, 10);
1224 			etherload(eth, &kernnm);
1225 		}
1226 		/*
1227 		 * couldn't boot on any ether.  don't give up;
1228 		 * perhaps the boot servers are down, so try again later.
1229 		 */
1230 		print("failed to boot via pxe; will try again.\n");
1231 		tsleep(&up->sleep, return0, 0, 15*1000);
1232 	}
1233 }
1234