xref: /netbsd-src/sys/net/if_tun.c (revision 2a399c6883d870daece976daec6ffa7bb7f934ce)
1 /*	$NetBSD: if_tun.c,v 1.32 1997/09/25 13:11:58 matt Exp $	*/
2 
3 /*
4  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5  * Nottingham University 1987.
6  *
7  * This source may be freely distributed, however I would be interested
8  * in any changes that are made.
9  *
10  * This driver takes packets off the IP i/f and hands them up to a
11  * user process to have its wicked way with. This driver has its
12  * roots in a similar driver written by Phil Cockcroft (formerly) at
13  * UCL. This driver is based much more on read/write/poll mode of
14  * operation though.
15  */
16 
17 #include "tun.h"
18 #if NTUN > 0
19 
20 #include <sys/param.h>
21 #include <sys/proc.h>
22 #include <sys/systm.h>
23 #include <sys/mbuf.h>
24 #include <sys/buf.h>
25 #include <sys/protosw.h>
26 #include <sys/socket.h>
27 #include <sys/ioctl.h>
28 #include <sys/errno.h>
29 #include <sys/syslog.h>
30 #include <sys/select.h>
31 #include <sys/poll.h>
32 #include <sys/file.h>
33 #include <sys/signalvar.h>
34 #include <sys/conf.h>
35 
36 #include <machine/cpu.h>
37 
38 #include <net/if.h>
39 #include <net/if_ether.h>
40 #include <net/netisr.h>
41 #include <net/route.h>
42 
43 
44 #ifdef INET
45 #include <netinet/in.h>
46 #include <netinet/in_systm.h>
47 #include <netinet/in_var.h>
48 #include <netinet/ip.h>
49 #include <netinet/if_inarp.h>
50 #endif
51 
52 #ifdef NS
53 #include <netns/ns.h>
54 #include <netns/ns_if.h>
55 #endif
56 
57 #include "bpfilter.h"
58 #if NBPFILTER > 0
59 #include <sys/time.h>
60 #include <net/bpf.h>
61 #endif
62 
63 #include <net/if_tun.h>
64 
65 #define TUNDEBUG	if (tundebug) printf
66 int	tundebug = 0;
67 
68 struct tun_softc tunctl[NTUN];
69 extern int ifqmaxlen;
70 void	tunattach __P((int));
71 
72 int	tun_ioctl __P((struct ifnet *, u_long, caddr_t));
73 int	tun_output __P((struct ifnet *, struct mbuf *, struct sockaddr *,
74 		       struct rtentry *rt));
75 
76 static void tuninit __P((struct tun_softc *));
77 
78 void
79 tunattach(unused)
80 	int unused;
81 {
82 	register int i;
83 	struct ifnet *ifp;
84 
85 	for (i = 0; i < NTUN; i++) {
86 		tunctl[i].tun_flags = TUN_INITED;
87 
88 		ifp = &tunctl[i].tun_if;
89 		sprintf(ifp->if_xname, "tun%d", i);
90 		ifp->if_softc = &tunctl[i];
91 		ifp->if_mtu = TUNMTU;
92 		ifp->if_ioctl = tun_ioctl;
93 		ifp->if_output = tun_output;
94 		ifp->if_flags = IFF_POINTOPOINT;
95 		ifp->if_snd.ifq_maxlen = ifqmaxlen;
96 		ifp->if_collisions = 0;
97 		ifp->if_ierrors = 0;
98 		ifp->if_oerrors = 0;
99 		ifp->if_ipackets = 0;
100 		ifp->if_opackets = 0;
101 		if_attach(ifp);
102 #if NBPFILTER > 0
103 		bpfattach(&tunctl[i].tun_bpf, ifp, DLT_NULL, sizeof(u_int32_t));
104 #endif
105 	}
106 }
107 
108 /*
109  * tunnel open - must be superuser & the device must be
110  * configured in
111  */
112 int
113 tunopen(dev, flag, mode, p)
114 	dev_t	dev;
115 	int	flag, mode;
116 	struct proc *p;
117 {
118 	struct ifnet	*ifp;
119 	struct tun_softc *tp;
120 	register int	unit, error;
121 
122 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
123 		return (error);
124 
125 	if ((unit = minor(dev)) >= NTUN)
126 		return (ENXIO);
127 	tp = &tunctl[unit];
128 	if (tp->tun_flags & TUN_OPEN)
129 		return ENXIO;
130 	ifp = &tp->tun_if;
131 	tp->tun_flags |= TUN_OPEN;
132 	TUNDEBUG("%s: open\n", ifp->if_xname);
133 	return (0);
134 }
135 
136 /*
137  * tunclose - close the device - mark i/f down & delete
138  * routing info
139  */
140 int
141 tunclose(dev, flag, mode, p)
142 	dev_t	dev;
143 	int	flag;
144 	int	mode;
145 	struct proc *p;
146 {
147 	register int	unit = minor(dev), s;
148 	struct tun_softc *tp = &tunctl[unit];
149 	struct ifnet	*ifp = &tp->tun_if;
150 	struct mbuf	*m;
151 
152 	tp->tun_flags &= ~TUN_OPEN;
153 
154 	/*
155 	 * junk all pending output
156 	 */
157 	do {
158 		s = splimp();
159 		IF_DEQUEUE(&ifp->if_snd, m);
160 		splx(s);
161 		if (m)
162 			m_freem(m);
163 	} while (m);
164 
165 	if (ifp->if_flags & IFF_UP) {
166 		s = splimp();
167 		if_down(ifp);
168 		if (ifp->if_flags & IFF_RUNNING) {
169 			/* find internet addresses and delete routes */
170 			register struct ifaddr *ifa;
171 			for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
172 			    ifa = ifa->ifa_list.tqe_next) {
173 				if (ifa->ifa_addr->sa_family == AF_INET) {
174 					rtinit(ifa, (int)RTM_DELETE,
175 					       tp->tun_flags & TUN_DSTADDR
176 							? RTF_HOST
177 							: 0);
178 				}
179 			}
180 		}
181 		splx(s);
182 	}
183 	tp->tun_pgrp = 0;
184 	selwakeup(&tp->tun_rsel);
185 
186 	TUNDEBUG ("%s: closed\n", ifp->if_xname);
187 	return (0);
188 }
189 
190 static void
191 tuninit(tp)
192 	struct tun_softc *tp;
193 {
194 	struct ifnet	*ifp = &tp->tun_if;
195 	register struct ifaddr *ifa;
196 
197 	TUNDEBUG("%s: tuninit\n", ifp->if_xname);
198 
199 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
200 
201 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);
202 	for (ifa = ifp->if_addrlist.tqh_first; ifa != 0;
203 	     ifa = ifa->ifa_list.tqe_next) {
204 		if (ifa->ifa_addr->sa_family == AF_INET) {
205 			struct sockaddr_in *sin;
206 
207 			sin = satosin(ifa->ifa_addr);
208 			if (sin && sin->sin_addr.s_addr)
209 				tp->tun_flags |= TUN_IASET;
210 
211 			if (ifp->if_flags & IFF_POINTOPOINT) {
212 				sin = satosin(ifa->ifa_dstaddr);
213 				if (sin && sin->sin_addr.s_addr)
214 					tp->tun_flags |= TUN_DSTADDR;
215 			}
216 		}
217 	}
218 
219 	return;
220 }
221 
222 /*
223  * Process an ioctl request.
224  */
225 int
226 tun_ioctl(ifp, cmd, data)
227 	struct ifnet *ifp;
228 	u_long cmd;
229 	caddr_t	data;
230 {
231 	int		error = 0, s;
232 
233 	s = splimp();
234 	switch(cmd) {
235 	case SIOCSIFADDR:
236 		tuninit((struct tun_softc *)(ifp->if_softc));
237 		TUNDEBUG("%s: address set\n", ifp->if_xname);
238 		break;
239 	case SIOCSIFDSTADDR:
240 		tuninit((struct tun_softc *)(ifp->if_softc));
241 		TUNDEBUG("%s: destination address set\n", ifp->if_xname);
242 		break;
243 	case SIOCSIFBRDADDR:
244 		TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
245 		break;
246 	case SIOCSIFMTU: {
247 		struct ifreq *ifr = (struct ifreq *) data;
248 		if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
249 		    error = EINVAL;
250 		    break;
251 		}
252 		TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
253 		ifp->if_mtu = ifr->ifr_mtu;
254 		break;
255 	}
256 	case SIOCADDMULTI:
257 	case SIOCDELMULTI: {
258 		struct ifreq *ifr = (struct ifreq *) data;
259 		if (ifr == 0) {
260 	        	error = EAFNOSUPPORT;           /* XXX */
261 			break;
262 		}
263 		switch (ifr->ifr_addr.sa_family) {
264 
265 #ifdef INET
266 		case AF_INET:
267 			break;
268 #endif
269 
270 		default:
271 			error = EAFNOSUPPORT;
272 			break;
273 		}
274 		break;
275 	}
276 	default:
277 		error = EINVAL;
278 	}
279 	splx(s);
280 	return (error);
281 }
282 
283 /*
284  * tun_output - queue packets from higher level ready to put out.
285  */
286 int
287 tun_output(ifp, m0, dst, rt)
288 	struct ifnet   *ifp;
289 	struct mbuf    *m0;
290 	struct sockaddr *dst;
291 	struct rtentry *rt;
292 {
293 	struct tun_softc *tp = ifp->if_softc;
294 	struct proc	*p;
295 	int		s;
296 
297 	TUNDEBUG ("%s: tun_output\n", ifp->if_xname);
298 
299 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
300 		TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
301 			  tp->tun_flags);
302 		m_freem (m0);
303 		return (EHOSTDOWN);
304 	}
305 
306 #if NBPFILTER > 0
307 	if (tp->tun_bpf) {
308 		/*
309 		 * We need to prepend the address family as
310 		 * a four byte field.  Cons up a dummy header
311 		 * to pacify bpf.  This is safe because bpf
312 		 * will only read from the mbuf (i.e., it won't
313 		 * try to free it or keep a pointer to it).
314 		 */
315 		struct mbuf m;
316 		u_int32_t af = dst->sa_family;
317 
318 		m.m_next = m0;
319 		m.m_len = sizeof(af);
320 		m.m_data = (char *)&af;
321 
322 		bpf_mtap(tp->tun_bpf, &m);
323 	}
324 #endif
325 
326 	switch(dst->sa_family) {
327 #ifdef INET
328 	case AF_INET:
329 		if (tp->tun_flags & TUN_PREPADDR) {
330 			/* Simple link-layer header */
331 			M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
332 			if (m0 == NULL) {
333 				IF_DROP(&ifp->if_snd);
334 				return (ENOBUFS);
335 			}
336 			bcopy(dst, mtod(m0, char *), dst->sa_len);
337 		}
338 
339 		s = splimp();
340 		if (IF_QFULL(&ifp->if_snd)) {
341 			IF_DROP(&ifp->if_snd);
342 			m_freem(m0);
343 			splx(s);
344 			ifp->if_collisions++;
345 			return (ENOBUFS);
346 		}
347 		IF_ENQUEUE(&ifp->if_snd, m0);
348 		splx(s);
349 		ifp->if_opackets++;
350 		break;
351 #endif
352 	default:
353 		m_freem(m0);
354 		return (EAFNOSUPPORT);
355 	}
356 
357 	if (tp->tun_flags & TUN_RWAIT) {
358 		tp->tun_flags &= ~TUN_RWAIT;
359 		wakeup((caddr_t)tp);
360 	}
361 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) {
362 		if (tp->tun_pgrp > 0)
363 			gsignal(tp->tun_pgrp, SIGIO);
364 		else if ((p = pfind(-tp->tun_pgrp)) != NULL)
365 			psignal(p, SIGIO);
366 	}
367 	selwakeup(&tp->tun_rsel);
368 	return (0);
369 }
370 
371 /*
372  * the cdevsw interface is now pretty minimal.
373  */
374 int
375 tunioctl(dev, cmd, data, flag, p)
376 	dev_t		dev;
377 	u_long		cmd;
378 	caddr_t		data;
379 	int		flag;
380 	struct proc	*p;
381 {
382 	int		unit = minor(dev), s;
383 	struct tun_softc *tp = &tunctl[unit];
384 
385 	switch (cmd) {
386 	case TUNSDEBUG:
387 		tundebug = *(int *)data;
388 		break;
389 
390 	case TUNGDEBUG:
391 		*(int *)data = tundebug;
392 		break;
393 
394 	case TUNSIFMODE:
395 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
396 		case IFF_POINTOPOINT:
397 		case IFF_BROADCAST:
398 			s = splimp();
399 			if (tp->tun_if.if_flags & IFF_UP) {
400 				splx(s);
401 				return (EBUSY);
402 			}
403 			tp->tun_if.if_flags &=
404 				~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
405 			tp->tun_if.if_flags |= *(int *)data;
406 			splx(s);
407 			break;
408 		default:
409 			return (EINVAL);
410 			break;
411 		}
412 		break;
413 
414 	case TUNSLMODE:
415 		if (*(int *)data)
416 			tp->tun_flags |= TUN_PREPADDR;
417 		else
418 			tp->tun_flags &= ~TUN_PREPADDR;
419 		break;
420 
421 	case FIONBIO:
422 		if (*(int *)data)
423 			tp->tun_flags |= TUN_NBIO;
424 		else
425 			tp->tun_flags &= ~TUN_NBIO;
426 		break;
427 
428 	case FIOASYNC:
429 		if (*(int *)data)
430 			tp->tun_flags |= TUN_ASYNC;
431 		else
432 			tp->tun_flags &= ~TUN_ASYNC;
433 		break;
434 
435 	case FIONREAD:
436 		s = splimp();
437 		if (tp->tun_if.if_snd.ifq_head)
438 			*(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
439 		else
440 			*(int *)data = 0;
441 		splx(s);
442 		break;
443 
444 	case TIOCSPGRP:
445 		tp->tun_pgrp = *(int *)data;
446 		break;
447 
448 	case TIOCGPGRP:
449 		*(int *)data = tp->tun_pgrp;
450 		break;
451 
452 	default:
453 		return (ENOTTY);
454 	}
455 	return (0);
456 }
457 
458 /*
459  * The cdevsw read interface - reads a packet at a time, or at
460  * least as much of a packet as can be read.
461  */
462 int
463 tunread(dev, uio, ioflag)
464 	dev_t		dev;
465 	struct uio	*uio;
466 	int		ioflag;
467 {
468 	int		unit = minor(dev);
469 	struct tun_softc *tp = &tunctl[unit];
470 	struct ifnet	*ifp = &tp->tun_if;
471 	struct mbuf	*m, *m0;
472 	int		error=0, len, s;
473 
474 	TUNDEBUG ("%s: read\n", ifp->if_xname);
475 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
476 		TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
477 		return EHOSTDOWN;
478 	}
479 
480 	tp->tun_flags &= ~TUN_RWAIT;
481 
482 	s = splimp();
483 	do {
484 		IF_DEQUEUE(&ifp->if_snd, m0);
485 		if (m0 == 0) {
486 			if (tp->tun_flags & TUN_NBIO) {
487 				splx(s);
488 				return (EWOULDBLOCK);
489 			}
490 			tp->tun_flags |= TUN_RWAIT;
491 			if (tsleep((caddr_t)tp, PZERO|PCATCH, "tunread", 0)) {
492 				splx(s);
493 				return (EINTR);
494 			}
495 		}
496 	} while (m0 == 0);
497 	splx(s);
498 
499 	while (m0 && uio->uio_resid > 0 && error == 0) {
500 		len = min(uio->uio_resid, m0->m_len);
501 		if (len == 0)
502 			break;
503 		error = uiomove(mtod(m0, caddr_t), len, uio);
504 		MFREE(m0, m);
505 		m0 = m;
506 	}
507 
508 	if (m0) {
509 		TUNDEBUG("Dropping mbuf\n");
510 		m_freem(m0);
511 	}
512 	if (error)
513 		ifp->if_ierrors++;
514 	return (error);
515 }
516 
517 /*
518  * the cdevsw write interface - an atomic write is a packet - or else!
519  */
520 int
521 tunwrite(dev, uio, ioflag)
522 	dev_t		dev;
523 	struct uio	*uio;
524 	int		ioflag;
525 {
526 	int		unit = minor (dev);
527 	struct tun_softc *tp = &tunctl[unit];
528 	struct ifnet	*ifp = &tp->tun_if;
529 	struct mbuf	*top, **mp, *m;
530 	struct ifqueue	*ifq;
531 	struct sockaddr	dst;
532 	int		isr, error=0, s, tlen, mlen;
533 
534 	TUNDEBUG("%s: tunwrite\n", ifp->if_xname);
535 
536 	if (tp->tun_flags & TUN_PREPADDR) {
537 		if (uio->uio_resid < sizeof(dst))
538 			return (EIO);
539 		error = uiomove((caddr_t)&dst, sizeof(dst), uio);
540 		if (dst.sa_len > sizeof(dst)) {
541 			/* Duh.. */
542 			char discard;
543 			int n = dst.sa_len - sizeof(dst);
544 			while (n--)
545 				if ((error = uiomove(&discard, 1, uio)) != 0)
546 					return (error);
547 		}
548 	} else {
549 #ifdef INET
550 		dst.sa_family = AF_INET;
551 #endif
552 	}
553 
554 	if (uio->uio_resid < 0 || uio->uio_resid > TUNMTU) {
555 		TUNDEBUG("%s: len=%d!\n", ifp->if_xname, uio->uio_resid);
556 		return (EIO);
557 	}
558 
559 	switch (dst.sa_family) {
560 #ifdef INET
561 	case AF_INET:
562 		ifq = &ipintrq;
563 		isr = NETISR_IP;
564 		break;
565 #endif
566 	default:
567 		return (EAFNOSUPPORT);
568 	}
569 
570 	tlen = uio->uio_resid;
571 
572 	/* get a header mbuf */
573 	MGETHDR(m, M_DONTWAIT, MT_DATA);
574 	if (m == NULL)
575 		return (ENOBUFS);
576 	mlen = MHLEN;
577 
578 	top = 0;
579 	mp = &top;
580 	while (error == 0 && uio->uio_resid > 0) {
581 		m->m_len = min(mlen, uio->uio_resid);
582 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
583 		*mp = m;
584 		mp = &m->m_next;
585 		if (uio->uio_resid > 0) {
586 			MGET (m, M_DONTWAIT, MT_DATA);
587 			if (m == 0) {
588 				error = ENOBUFS;
589 				break;
590 			}
591 			mlen = MLEN;
592 		}
593 	}
594 	if (error) {
595 		if (top)
596 			m_freem (top);
597 		ifp->if_ierrors++;
598 		return (error);
599 	}
600 
601 	top->m_pkthdr.len = tlen;
602 	top->m_pkthdr.rcvif = ifp;
603 
604 #if NBPFILTER > 0
605 	if (tp->tun_bpf) {
606 		/*
607 		 * We need to prepend the address family as
608 		 * a four byte field.  Cons up a dummy header
609 		 * to pacify bpf.  This is safe because bpf
610 		 * will only read from the mbuf (i.e., it won't
611 		 * try to free it or keep a pointer to it).
612 		 */
613 		struct mbuf m;
614 		u_int32_t af = AF_INET;
615 
616 		m.m_next = top;
617 		m.m_len = sizeof(af);
618 		m.m_data = (char *)&af;
619 
620 		bpf_mtap(tp->tun_bpf, &m);
621 	}
622 #endif
623 
624 	s = splimp();
625 	if (IF_QFULL(ifq)) {
626 		IF_DROP(ifq);
627 		splx(s);
628 		ifp->if_collisions++;
629 		m_freem(top);
630 		return (ENOBUFS);
631 	}
632 	IF_ENQUEUE(ifq, top);
633 	splx(s);
634 	ifp->if_ipackets++;
635 	schednetisr(isr);
636 	return (error);
637 }
638 
639 /*
640  * tunpoll - the poll interface, this is only useful on reads
641  * really. The write detect always returns true, write never blocks
642  * anyway, it either accepts the packet or drops it.
643  */
644 int
645 tunpoll(dev, events, p)
646 	dev_t		dev;
647 	int		events;
648 	struct proc	*p;
649 {
650 	int		unit = minor(dev), s;
651 	struct tun_softc *tp = &tunctl[unit];
652 	struct ifnet	*ifp = &tp->tun_if;
653 	int		revents = 0;
654 
655 	s = splimp();
656 	TUNDEBUG("%s: tunpoll\n", ifp->if_xname);
657 
658 	if (events & (POLLIN | POLLRDNORM))
659 		if (ifp->if_snd.ifq_len > 0) {
660 			TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
661 			    ifp->if_snd.ifq_len);
662 			revents |= events & (POLLIN | POLLRDNORM);
663 		} else {
664 			TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
665 			selrecord(p, &tp->tun_rsel);
666 		}
667 
668 	if (events & (POLLOUT | POLLWRNORM))
669 		revents |= events & (POLLOUT | POLLWRNORM);
670 
671 	splx(s);
672 	return (revents);
673 }
674 
675 #endif  /* NTUN */
676