xref: /plan9/sys/src/9/ip/ip.c (revision 6b7c5dce8459a5aafcfa1a501da6f475d0d3f97c)
1 #include	"u.h"
2 #include	"../port/lib.h"
3 #include	"mem.h"
4 #include	"dat.h"
5 #include	"fns.h"
6 #include	"../port/error.h"
7 
8 #include	"ip.h"
9 
10 #define BLKIPVER(xp)	(((Ip4hdr*)((xp)->rp))->vihl&0xF0)
11 
12 static char *statnames[] =
13 {
14 [Forwarding]	"Forwarding",
15 [DefaultTTL]	"DefaultTTL",
16 [InReceives]	"InReceives",
17 [InHdrErrors]	"InHdrErrors",
18 [InAddrErrors]	"InAddrErrors",
19 [ForwDatagrams]	"ForwDatagrams",
20 [InUnknownProtos]	"InUnknownProtos",
21 [InDiscards]	"InDiscards",
22 [InDelivers]	"InDelivers",
23 [OutRequests]	"OutRequests",
24 [OutDiscards]	"OutDiscards",
25 [OutNoRoutes]	"OutNoRoutes",
26 [ReasmTimeout]	"ReasmTimeout",
27 [ReasmReqds]	"ReasmReqds",
28 [ReasmOKs]	"ReasmOKs",
29 [ReasmFails]	"ReasmFails",
30 [FragOKs]	"FragOKs",
31 [FragFails]	"FragFails",
32 [FragCreates]	"FragCreates",
33 };
34 
35 #define BLKIP(xp)	((Ip4hdr*)((xp)->rp))
36 /*
37  * This sleazy macro relies on the media header size being
38  * larger than sizeof(Ipfrag). ipreassemble checks this is true
39  */
40 #define BKFG(xp)	((Ipfrag*)((xp)->base))
41 
42 ushort		ipcsum(uchar*);
43 Block*		ip4reassemble(IP*, int, Block*, Ip4hdr*);
44 void		ipfragfree4(IP*, Fragment4*);
45 Fragment4*	ipfragallo4(IP*);
46 
47 void
ip_init_6(Fs * f)48 ip_init_6(Fs *f)
49 {
50 	v6params *v6p;
51 
52 	v6p = smalloc(sizeof(v6params));
53 
54 	v6p->rp.mflag		= 0;		/* default not managed */
55 	v6p->rp.oflag		= 0;
56 	v6p->rp.maxraint	= 600000;	/* millisecs */
57 	v6p->rp.minraint	= 200000;
58 	v6p->rp.linkmtu		= 0;		/* no mtu sent */
59 	v6p->rp.reachtime	= 0;
60 	v6p->rp.rxmitra		= 0;
61 	v6p->rp.ttl		= MAXTTL;
62 	v6p->rp.routerlt	= 3 * v6p->rp.maxraint;
63 
64 	v6p->hp.rxmithost	= 1000;		/* v6 RETRANS_TIMER */
65 
66 	v6p->cdrouter 		= -1;
67 
68 	f->v6p			= v6p;
69 }
70 
71 void
initfrag(IP * ip,int size)72 initfrag(IP *ip, int size)
73 {
74 	Fragment4 *fq4, *eq4;
75 	Fragment6 *fq6, *eq6;
76 
77 	ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size);
78 	if(ip->fragfree4 == nil)
79 		panic("initfrag");
80 
81 	eq4 = &ip->fragfree4[size];
82 	for(fq4 = ip->fragfree4; fq4 < eq4; fq4++)
83 		fq4->next = fq4+1;
84 
85 	ip->fragfree4[size-1].next = nil;
86 
87 	ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size);
88 	if(ip->fragfree6 == nil)
89 		panic("initfrag");
90 
91 	eq6 = &ip->fragfree6[size];
92 	for(fq6 = ip->fragfree6; fq6 < eq6; fq6++)
93 		fq6->next = fq6+1;
94 
95 	ip->fragfree6[size-1].next = nil;
96 }
97 
98 void
ip_init(Fs * f)99 ip_init(Fs *f)
100 {
101 	IP *ip;
102 
103 	ip = smalloc(sizeof(IP));
104 	initfrag(ip, 100);
105 	f->ip = ip;
106 
107 	ip_init_6(f);
108 }
109 
110 void
iprouting(Fs * f,int on)111 iprouting(Fs *f, int on)
112 {
113 	f->ip->iprouting = on;
114 	if(f->ip->iprouting==0)
115 		f->ip->stats[Forwarding] = 2;
116 	else
117 		f->ip->stats[Forwarding] = 1;
118 }
119 
120 int
ipoput4(Fs * f,Block * bp,int gating,int ttl,int tos,Conv * c)121 ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
122 {
123 	Ipifc *ifc;
124 	uchar *gate;
125 	ulong fragoff;
126 	Block *xp, *nb;
127 	Ip4hdr *eh, *feh;
128 	int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
129 	Route *r, *sr;
130 	IP *ip;
131 	int rv = 0;
132 
133 	ip = f->ip;
134 
135 	/* Fill out the ip header */
136 	eh = (Ip4hdr*)(bp->rp);
137 
138 	ip->stats[OutRequests]++;
139 
140 	/* Number of uchars in data and ip header to write */
141 	len = blocklen(bp);
142 
143 	if(gating){
144 		chunk = nhgets(eh->length);
145 		if(chunk > len){
146 			ip->stats[OutDiscards]++;
147 			netlog(f, Logip, "short gated packet\n");
148 			goto free;
149 		}
150 		if(chunk < len)
151 			len = chunk;
152 	}
153 	if(len >= IP_MAX){
154 		ip->stats[OutDiscards]++;
155 		netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
156 		goto free;
157 	}
158 
159 	r = v4lookup(f, eh->dst, c);
160 	if(r == nil){
161 		ip->stats[OutNoRoutes]++;
162 		netlog(f, Logip, "no interface %V\n", eh->dst);
163 		rv = -1;
164 		goto free;
165 	}
166 
167 	ifc = r->ifc;
168 	if(r->type & (Rifc|Runi))
169 		gate = eh->dst;
170 	else
171 	if(r->type & (Rbcast|Rmulti)) {
172 		gate = eh->dst;
173 		sr = v4lookup(f, eh->src, nil);
174 		if(sr != nil && (sr->type & Runi))
175 			ifc = sr->ifc;
176 	}
177 	else
178 		gate = r->v4.gate;
179 
180 	if(!gating)
181 		eh->vihl = IP_VER4|IP_HLEN4;
182 	eh->ttl = ttl;
183 	if(!gating)
184 		eh->tos = tos;
185 
186 	if(!canrlock(ifc))
187 		goto free;
188 	if(waserror()){
189 		runlock(ifc);
190 		nexterror();
191 	}
192 	if(ifc->m == nil)
193 		goto raise;
194 
195 	/* If we dont need to fragment just send it */
196 	if(c && c->maxfragsize && c->maxfragsize < ifc->maxtu)
197 		medialen = c->maxfragsize - ifc->m->hsize;
198 	else
199 		medialen = ifc->maxtu - ifc->m->hsize;
200 	if(len <= medialen) {
201 		if(!gating)
202 			hnputs(eh->id, incref(&ip->id4));
203 		hnputs(eh->length, len);
204 		if(!gating){
205 			eh->frag[0] = 0;
206 			eh->frag[1] = 0;
207 		}
208 		eh->cksum[0] = 0;
209 		eh->cksum[1] = 0;
210 		hnputs(eh->cksum, ipcsum(&eh->vihl));
211 		assert(bp->next == nil);
212 		ifc->m->bwrite(ifc, bp, V4, gate);
213 		runlock(ifc);
214 		poperror();
215 		return 0;
216 	}
217 
218 	if((eh->frag[0] & (IP_DF>>8)) && !gating)
219 		print("%V: DF set\n", eh->dst);
220 
221 	if(eh->frag[0] & (IP_DF>>8)){
222 		ip->stats[FragFails]++;
223 		ip->stats[OutDiscards]++;
224 		icmpcantfrag(f, bp, medialen);
225 		netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
226 		goto raise;
227 	}
228 
229 	seglen = (medialen - IP4HDR) & ~7;
230 	if(seglen < 8){
231 		ip->stats[FragFails]++;
232 		ip->stats[OutDiscards]++;
233 		netlog(f, Logip, "%V seglen < 8\n", eh->dst);
234 		goto raise;
235 	}
236 
237 	dlen = len - IP4HDR;
238 	xp = bp;
239 	if(gating)
240 		lid = nhgets(eh->id);
241 	else
242 		lid = incref(&ip->id4);
243 
244 	offset = IP4HDR;
245 	while(xp != nil && offset && offset >= BLEN(xp)) {
246 		offset -= BLEN(xp);
247 		xp = xp->next;
248 	}
249 	xp->rp += offset;
250 
251 	if(gating)
252 		fragoff = nhgets(eh->frag)<<3;
253 	else
254 		fragoff = 0;
255 	dlen += fragoff;
256 	for(; fragoff < dlen; fragoff += seglen) {
257 		nb = allocb(IP4HDR+seglen);
258 		feh = (Ip4hdr*)(nb->rp);
259 
260 		memmove(nb->wp, eh, IP4HDR);
261 		nb->wp += IP4HDR;
262 
263 		if((fragoff + seglen) >= dlen) {
264 			seglen = dlen - fragoff;
265 			hnputs(feh->frag, fragoff>>3);
266 		}
267 		else
268 			hnputs(feh->frag, (fragoff>>3)|IP_MF);
269 
270 		hnputs(feh->length, seglen + IP4HDR);
271 		hnputs(feh->id, lid);
272 
273 		/* Copy up the data area */
274 		chunk = seglen;
275 		while(chunk) {
276 			if(!xp) {
277 				ip->stats[OutDiscards]++;
278 				ip->stats[FragFails]++;
279 				freeblist(nb);
280 				netlog(f, Logip, "!xp: chunk %d\n", chunk);
281 				goto raise;
282 			}
283 			blklen = chunk;
284 			if(BLEN(xp) < chunk)
285 				blklen = BLEN(xp);
286 			memmove(nb->wp, xp->rp, blklen);
287 			nb->wp += blklen;
288 			xp->rp += blklen;
289 			chunk -= blklen;
290 			if(xp->rp == xp->wp)
291 				xp = xp->next;
292 		}
293 
294 		feh->cksum[0] = 0;
295 		feh->cksum[1] = 0;
296 		hnputs(feh->cksum, ipcsum(&feh->vihl));
297 		ifc->m->bwrite(ifc, nb, V4, gate);
298 		ip->stats[FragCreates]++;
299 	}
300 	ip->stats[FragOKs]++;
301 raise:
302 	runlock(ifc);
303 	poperror();
304 free:
305 	freeblist(bp);
306 	return rv;
307 }
308 
309 void
ipiput4(Fs * f,Ipifc * ifc,Block * bp)310 ipiput4(Fs *f, Ipifc *ifc, Block *bp)
311 {
312 	int hl;
313 	int hop, tos, proto, olen;
314 	Ip4hdr *h;
315 	Proto *p;
316 	ushort frag;
317 	int notforme;
318 	uchar *dp, v6dst[IPaddrlen];
319 	IP *ip;
320 	Route *r;
321 	Conv conv;
322 
323 	if(BLKIPVER(bp) != IP_VER4) {
324 		ipiput6(f, ifc, bp);
325 		return;
326 	}
327 
328 	ip = f->ip;
329 	ip->stats[InReceives]++;
330 
331 	/*
332 	 *  Ensure we have all the header info in the first
333 	 *  block.  Make life easier for other protocols by
334 	 *  collecting up to the first 64 bytes in the first block.
335 	 */
336 	if(BLEN(bp) < 64) {
337 		hl = blocklen(bp);
338 		if(hl < IP4HDR)
339 			hl = IP4HDR;
340 		if(hl > 64)
341 			hl = 64;
342 		bp = pullupblock(bp, hl);
343 		if(bp == nil)
344 			return;
345 	}
346 
347 	h = (Ip4hdr*)(bp->rp);
348 
349 	/* dump anything that whose header doesn't checksum */
350 	if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
351 		ip->stats[InHdrErrors]++;
352 		netlog(f, Logip, "ip: checksum error %V\n", h->src);
353 		freeblist(bp);
354 		return;
355 	}
356 	v4tov6(v6dst, h->dst);
357 	notforme = ipforme(f, v6dst) == 0;
358 
359 	/* Check header length and version */
360 	if((h->vihl&0x0F) != IP_HLEN4) {
361 		hl = (h->vihl&0xF)<<2;
362 		if(hl < (IP_HLEN4<<2)) {
363 			ip->stats[InHdrErrors]++;
364 			netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
365 			freeblist(bp);
366 			return;
367 		}
368 		/* If this is not routed strip off the options */
369 		if(notforme == 0) {
370 			olen = nhgets(h->length);
371 			dp = bp->rp + (hl - (IP_HLEN4<<2));
372 			memmove(dp, h, IP_HLEN4<<2);
373 			bp->rp = dp;
374 			h = (Ip4hdr*)(bp->rp);
375 			h->vihl = (IP_VER4|IP_HLEN4);
376 			hnputs(h->length, olen-hl+(IP_HLEN4<<2));
377 		}
378 	}
379 
380 	/* route */
381 	if(notforme) {
382 		if(!ip->iprouting){
383 			freeblist(bp);
384 			return;
385 		}
386 
387 		/* don't forward to source's network */
388 		memset(&conv, 0, sizeof conv);
389 		conv.r = nil;
390 		r = v4lookup(f, h->dst, &conv);
391 		if(r == nil || r->ifc == ifc){
392 			ip->stats[OutDiscards]++;
393 			freeblist(bp);
394 			return;
395 		}
396 
397 		/* don't forward if packet has timed out */
398 		hop = h->ttl;
399 		if(hop < 1) {
400 			ip->stats[InHdrErrors]++;
401 			icmpttlexceeded(f, ifc->lifc->local, bp);
402 			freeblist(bp);
403 			return;
404 		}
405 
406 		/* reassemble if the interface expects it */
407 if(r->ifc == nil) panic("nil route rfc");
408 		if(r->ifc->reassemble){
409 			frag = nhgets(h->frag);
410 			if(frag) {
411 				h->tos = 0;
412 				if(frag & IP_MF)
413 					h->tos = 1;
414 				bp = ip4reassemble(ip, frag, bp, h);
415 				if(bp == nil)
416 					return;
417 				h = (Ip4hdr*)(bp->rp);
418 			}
419 		}
420 
421 		ip->stats[ForwDatagrams]++;
422 		tos = h->tos;
423 		hop = h->ttl;
424 		ipoput4(f, bp, 1, hop - 1, tos, &conv);
425 		return;
426 	}
427 
428 	frag = nhgets(h->frag);
429 	if(frag) {
430 		h->tos = 0;
431 		if(frag & IP_MF)
432 			h->tos = 1;
433 		bp = ip4reassemble(ip, frag, bp, h);
434 		if(bp == nil)
435 			return;
436 		h = (Ip4hdr*)(bp->rp);
437 	}
438 
439 	/* don't let any frag info go up the stack */
440 	h->frag[0] = 0;
441 	h->frag[1] = 0;
442 
443 	proto = h->proto;
444 	p = Fsrcvpcol(f, proto);
445 	if(p != nil && p->rcv != nil) {
446 		ip->stats[InDelivers]++;
447 		(*p->rcv)(p, ifc, bp);
448 		return;
449 	}
450 	ip->stats[InDiscards]++;
451 	ip->stats[InUnknownProtos]++;
452 	freeblist(bp);
453 }
454 
455 int
ipstats(Fs * f,char * buf,int len)456 ipstats(Fs *f, char *buf, int len)
457 {
458 	IP *ip;
459 	char *p, *e;
460 	int i;
461 
462 	ip = f->ip;
463 	ip->stats[DefaultTTL] = MAXTTL;
464 
465 	p = buf;
466 	e = p+len;
467 	for(i = 0; i < Nipstats; i++)
468 		p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
469 	return p - buf;
470 }
471 
472 Block*
ip4reassemble(IP * ip,int offset,Block * bp,Ip4hdr * ih)473 ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
474 {
475 	int fend;
476 	ushort id;
477 	Fragment4 *f, *fnext;
478 	ulong src, dst;
479 	Block *bl, **l, *last, *prev;
480 	int ovlap, len, fragsize, pktposn;
481 
482 	src = nhgetl(ih->src);
483 	dst = nhgetl(ih->dst);
484 	id = nhgets(ih->id);
485 
486 	/*
487 	 *  block lists are too hard, pullupblock into a single block
488 	 */
489 	if(bp->next){
490 		bp = pullupblock(bp, blocklen(bp));
491 		ih = (Ip4hdr*)(bp->rp);
492 	}
493 
494 	qlock(&ip->fraglock4);
495 
496 	/*
497 	 *  find a reassembly queue for this fragment
498 	 */
499 	for(f = ip->flisthead4; f; f = fnext){
500 		fnext = f->next;	/* because ipfragfree4 changes the list */
501 		if(f->src == src && f->dst == dst && f->id == id)
502 			break;
503 		if(f->age < NOW){
504 			ip->stats[ReasmTimeout]++;
505 			ipfragfree4(ip, f);
506 		}
507 	}
508 
509 	/*
510 	 *  if this isn't a fragmented packet, accept it
511 	 *  and get rid of any fragments that might go
512 	 *  with it.
513 	 */
514 	if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
515 		if(f != nil) {
516 			ipfragfree4(ip, f);
517 			ip->stats[ReasmFails]++;
518 		}
519 		qunlock(&ip->fraglock4);
520 		return bp;
521 	}
522 
523 	if(bp->base+IPFRAGSZ >= bp->rp){
524 		bp = padblock(bp, IPFRAGSZ);
525 		bp->rp += IPFRAGSZ;
526 	}
527 
528 	BKFG(bp)->foff = offset<<3;
529 	BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
530 
531 	/* First fragment allocates a reassembly queue */
532 	if(f == nil) {
533 		f = ipfragallo4(ip);
534 		f->id = id;
535 		f->src = src;
536 		f->dst = dst;
537 
538 		f->blist = bp;
539 
540 		qunlock(&ip->fraglock4);
541 		ip->stats[ReasmReqds]++;
542 		return nil;
543 	}
544 
545 	/*
546 	 *  find the new fragment's position in the queue
547 	 */
548 	prev = nil;
549 	l = &f->blist;
550 	bl = f->blist;
551 	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
552 		prev = bl;
553 		l = &bl->next;
554 		bl = bl->next;
555 	}
556 
557 	/* Check overlap of a previous fragment - trim away as necessary */
558 	if(prev) {
559 		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
560 		if(ovlap > 0) {
561 			if(ovlap >= BKFG(bp)->flen) {
562 				freeblist(bp);
563 				qunlock(&ip->fraglock4);
564 				return nil;
565 			}
566 			BKFG(prev)->flen -= ovlap;
567 		}
568 	}
569 
570 	/* Link onto assembly queue */
571 	bp->next = *l;
572 	*l = bp;
573 
574 	/* Check to see if succeeding segments overlap */
575 	if(bp->next) {
576 		l = &bp->next;
577 		fend = BKFG(bp)->foff + BKFG(bp)->flen;
578 		/* Take completely covered segments out */
579 		while(*l) {
580 			ovlap = fend - BKFG(*l)->foff;
581 			if(ovlap <= 0)
582 				break;
583 			if(ovlap < BKFG(*l)->flen) {
584 				BKFG(*l)->flen -= ovlap;
585 				BKFG(*l)->foff += ovlap;
586 				/* move up ih hdrs */
587 				memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
588 				(*l)->rp += ovlap;
589 				break;
590 			}
591 			last = (*l)->next;
592 			(*l)->next = nil;
593 			freeblist(*l);
594 			*l = last;
595 		}
596 	}
597 
598 	/*
599 	 *  look for a complete packet.  if we get to a fragment
600 	 *  without IP_MF set, we're done.
601 	 */
602 	pktposn = 0;
603 	for(bl = f->blist; bl; bl = bl->next) {
604 		if(BKFG(bl)->foff != pktposn)
605 			break;
606 		if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
607 			bl = f->blist;
608 			len = nhgets(BLKIP(bl)->length);
609 			bl->wp = bl->rp + len;
610 
611 			/* Pullup all the fragment headers and
612 			 * return a complete packet
613 			 */
614 			for(bl = bl->next; bl; bl = bl->next) {
615 				fragsize = BKFG(bl)->flen;
616 				len += fragsize;
617 				bl->rp += IP4HDR;
618 				bl->wp = bl->rp + fragsize;
619 			}
620 
621 			bl = f->blist;
622 			f->blist = nil;
623 			ipfragfree4(ip, f);
624 			ih = BLKIP(bl);
625 			hnputs(ih->length, len);
626 			qunlock(&ip->fraglock4);
627 			ip->stats[ReasmOKs]++;
628 			return bl;
629 		}
630 		pktposn += BKFG(bl)->flen;
631 	}
632 	qunlock(&ip->fraglock4);
633 	return nil;
634 }
635 
636 /*
637  * ipfragfree4 - Free a list of fragments - assume hold fraglock4
638  */
639 void
ipfragfree4(IP * ip,Fragment4 * frag)640 ipfragfree4(IP *ip, Fragment4 *frag)
641 {
642 	Fragment4 *fl, **l;
643 
644 	if(frag->blist)
645 		freeblist(frag->blist);
646 
647 	frag->src = 0;
648 	frag->id = 0;
649 	frag->blist = nil;
650 
651 	l = &ip->flisthead4;
652 	for(fl = *l; fl; fl = fl->next) {
653 		if(fl == frag) {
654 			*l = frag->next;
655 			break;
656 		}
657 		l = &fl->next;
658 	}
659 
660 	frag->next = ip->fragfree4;
661 	ip->fragfree4 = frag;
662 
663 }
664 
665 /*
666  * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
667  */
668 Fragment4 *
ipfragallo4(IP * ip)669 ipfragallo4(IP *ip)
670 {
671 	Fragment4 *f;
672 
673 	while(ip->fragfree4 == nil) {
674 		/* free last entry on fraglist */
675 		for(f = ip->flisthead4; f->next; f = f->next)
676 			;
677 		ipfragfree4(ip, f);
678 	}
679 	f = ip->fragfree4;
680 	ip->fragfree4 = f->next;
681 	f->next = ip->flisthead4;
682 	ip->flisthead4 = f;
683 	f->age = NOW + 30000;
684 
685 	return f;
686 }
687 
688 ushort
ipcsum(uchar * addr)689 ipcsum(uchar *addr)
690 {
691 	int len;
692 	ulong sum;
693 
694 	sum = 0;
695 	len = (addr[0]&0xf)<<2;
696 
697 	while(len > 0) {
698 		sum += addr[0]<<8 | addr[1] ;
699 		len -= 2;
700 		addr += 2;
701 	}
702 
703 	sum = (sum & 0xffff) + (sum >> 16);
704 	sum = (sum & 0xffff) + (sum >> 16);
705 
706 	return (sum^0xffff);
707 }
708