xref: /dflybsd-src/sys/kern/kern_jail.c (revision 2b3f93ea6d1f70880f3e87f3c2cbe0dc0bfc9332)
1 /*
2  * ----------------------------------------------------------------------------
3  * "THE BEER-WARE LICENSE" (Revision 42):
4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5  * can do whatever you want with this stuff. If we meet some day, and you think
6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7  * ----------------------------------------------------------------------------
8  *
9  */
10 /*-
11  * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net>
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 
37 /*
38  * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $
39  */
40 
41 #include "opt_inet6.h"
42 
43 #include <sys/param.h>
44 #include <sys/types.h>
45 #include <sys/kernel.h>
46 #include <sys/systm.h>
47 #include <sys/errno.h>
48 #include <sys/sysmsg.h>
49 #include <sys/malloc.h>
50 #include <sys/nlookup.h>
51 #include <sys/namecache.h>
52 #include <sys/proc.h>
53 #include <sys/caps.h>
54 #include <sys/jail.h>
55 #include <sys/socket.h>
56 #include <sys/sysctl.h>
57 #include <sys/kern_syscall.h>
58 #include <net/if.h>
59 #include <netinet/in.h>
60 #include <netinet6/in6_var.h>
61 
62 static struct prison	*prison_find(int);
63 static void		prison_ipcache_init(struct prison *);
64 
65 __read_mostly static prison_cap_t	prison_default_caps;
66 
67 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
68 
69 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0,
70     "All jails settings");
71 
72 SYSCTL_NODE(_jail, OID_AUTO, defaults, CTLFLAG_RW, 0,
73     "Default options for jails");
74 
75 /*#define PRISON_DEBUG*/
76 #ifdef PRISON_DEBUG
77 __read_mostly static int prison_debug;
78 SYSCTL_INT(_jail, OID_AUTO, debug, CTLFLAG_RW, &prison_debug, 0,
79     "Debug prison refs");
80 #endif
81 
82 SYSCTL_BIT64(_jail_defaults, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
83     &prison_default_caps, 1, PRISON_CAP_SYS_SET_HOSTNAME,
84     "Processes in jail can set their hostnames");
85 
86 SYSCTL_BIT64(_jail_defaults, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
87     &prison_default_caps, 0, PRISON_CAP_NET_UNIXIPROUTE,
88     "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
89 
90 SYSCTL_BIT64(_jail_defaults, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
91     &prison_default_caps, 0, PRISON_CAP_SYS_SYSVIPC,
92     "Processes in jail can use System V IPC primitives");
93 
94 SYSCTL_BIT64(_jail_defaults, OID_AUTO, chflags_allowed, CTLFLAG_RW,
95     &prison_default_caps, 0, PRISON_CAP_VFS_CHFLAGS,
96     "Processes in jail can alter system file flags");
97 
98 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
99     &prison_default_caps, 0, PRISON_CAP_NET_RAW_SOCKETS,
100     "Process in jail can create raw sockets");
101 
102 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_listen_override, CTLFLAG_RW,
103     &prison_default_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE,
104     "Process in jail can override host wildcard listen");
105 
106 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_nullfs, CTLFLAG_RW,
107     &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS,
108     "Process in jail can mount nullfs(5) filesystems");
109 
110 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_tmpfs, CTLFLAG_RW,
111     &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS,
112     "Process in jail can mount tmpfs(5) filesystems");
113 
114 static int	lastprid = 0;
115 static int	prisoncount = 0;
116 
117 static struct lock jail_lock =
118        LOCK_INITIALIZER("jail", 0, LK_CANRECURSE);
119 
120 LIST_HEAD(prisonlist, prison);
121 static struct prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison);
122 
123 static int
124 kern_jail_attach(int jid)
125 {
126 	struct proc *p = curthread->td_proc;
127 	struct prison *pr;
128 	struct ucred *cr;
129 	int error;
130 
131 	pr = prison_find(jid);
132 	if (pr == NULL)
133 		return(EINVAL);
134 
135 	error = kern_chroot(&pr->pr_root);
136 	if (error)
137 		return(error);
138 
139 	prison_hold(pr);
140 	lwkt_gettoken(&p->p_token);
141 	cr = cratom_proc(p);
142 	cr->cr_prison = pr;
143 	p->p_flags |= P_JAILED;
144 	caps_set_locked(p, SYSCAP_RESTRICTEDROOT, __SYSCAP_ALL);
145 	lwkt_reltoken(&p->p_token);
146 
147 	return(0);
148 }
149 
150 static int
151 assign_prison_id(struct prison *pr)
152 {
153 	int tryprid;
154 	struct prison *tpr;
155 
156 	tryprid = lastprid + 1;
157 	if (tryprid == JAIL_MAX)
158 		tryprid = 1;
159 
160 	lockmgr(&jail_lock, LK_EXCLUSIVE);
161 next:
162 	LIST_FOREACH(tpr, &allprison, pr_list) {
163 		if (tpr->pr_id != tryprid)
164 			continue;
165 		tryprid++;
166 		if (tryprid == JAIL_MAX) {
167 			lockmgr(&jail_lock, LK_RELEASE);
168 			return (ERANGE);
169 		}
170 		goto next;
171 	}
172 	pr->pr_id = lastprid = tryprid;
173 	lockmgr(&jail_lock, LK_RELEASE);
174 
175 	return (0);
176 }
177 
178 static int
179 kern_jail(struct prison *pr, struct jail *j)
180 {
181 	int error;
182 	struct nlookupdata nd;
183 
184 	error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW);
185 	if (error) {
186 		nlookup_done(&nd);
187 		return (error);
188 	}
189 	error = nlookup(&nd);
190 	if (error) {
191 		nlookup_done(&nd);
192 		return (error);
193 	}
194 	cache_copy(&nd.nl_nch, &pr->pr_root);
195 
196 	varsymset_init(&pr->pr_varsymset, NULL);
197 	prison_ipcache_init(pr);
198 
199 	error = assign_prison_id(pr);
200 	if (error) {
201 		varsymset_clean(&pr->pr_varsymset);
202 		nlookup_done(&nd);
203 		return (error);
204 	}
205 
206 	lockmgr(&jail_lock, LK_EXCLUSIVE);
207 	LIST_INSERT_HEAD(&allprison, pr, pr_list);
208 	++prisoncount;
209 	lockmgr(&jail_lock, LK_RELEASE);
210 
211 	error = prison_sysctl_create(pr);
212 	if (error)
213 		goto out;
214 
215 	error = kern_jail_attach(pr->pr_id);
216 	if (error)
217 		goto out2;
218 
219 	nlookup_done(&nd);
220 	return 0;
221 
222 out2:
223 	prison_sysctl_done(pr);
224 
225 out:
226 	lockmgr(&jail_lock, LK_EXCLUSIVE);
227 	LIST_REMOVE(pr, pr_list);
228 	--prisoncount;
229 	lockmgr(&jail_lock, LK_RELEASE);
230 	varsymset_clean(&pr->pr_varsymset);
231 	nlookup_done(&nd);
232 	return (error);
233 }
234 
235 /*
236  * jail()
237  *
238  * jail_args(syscallarg(struct jail *) jail)
239  *
240  * MPALMOSTSAFE
241  */
242 int
243 sys_jail(struct sysmsg *sysmsg, const struct jail_args *uap)
244 {
245 	struct prison *pr;
246 	struct jail_ip_storage *jip;
247 	struct jail j;
248 	int error;
249 	uint32_t jversion;
250 
251 	sysmsg->sysmsg_result = -1;
252 
253 	error = caps_priv_check_self(SYSCAP_NOJAIL_CREATE);
254 	if (error)
255 		return (error);
256 
257 	error = copyin(uap->jail, &jversion, sizeof(jversion));
258 	if (error)
259 		return (error);
260 
261 	pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
262 	SLIST_INIT(&pr->pr_ips);
263 	lockmgr(&jail_lock, LK_EXCLUSIVE);
264 
265 	switch (jversion) {
266 	case 0:
267 		/* Single IPv4 jails. */
268 		{
269 		struct jail_v0 jv0;
270 		struct sockaddr_in ip4addr;
271 
272 		error = copyin(uap->jail, &jv0, sizeof(jv0));
273 		if (error)
274 			goto out;
275 
276 		j.path = jv0.path;
277 		j.hostname = jv0.hostname;
278 
279 		jip = kmalloc(sizeof(*jip),  M_PRISON, M_WAITOK | M_ZERO);
280 		ip4addr.sin_family = AF_INET;
281 		ip4addr.sin_addr.s_addr = htonl(jv0.ip_number);
282 		memcpy(&jip->ip, &ip4addr, sizeof(ip4addr));
283 		SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
284 		break;
285 		}
286 
287 	case 1:
288 		/*
289 		 * DragonFly multi noIP/IPv4/IPv6 jails
290 		 *
291 		 * NOTE: This version is unsupported by FreeBSD
292 		 * (which uses version 2 instead).
293 		 */
294 
295 		error = copyin(uap->jail, &j, sizeof(j));
296 		if (error)
297 			goto out;
298 
299 		for (int i = 0; i < j.n_ips; i++) {
300 			jip = kmalloc(sizeof(*jip), M_PRISON,
301 				      M_WAITOK | M_ZERO);
302 			SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
303 			error = copyin(&j.ips[i], &jip->ip,
304 					sizeof(struct sockaddr_storage));
305 			if (error)
306 				goto out;
307 		}
308 		break;
309 	default:
310 		error = EINVAL;
311 		goto out;
312 	}
313 
314 	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
315 	if (error)
316 		goto out;
317 
318 	/* Use default capabilities as a template */
319 	pr->pr_caps = prison_default_caps;
320 
321 	error = kern_jail(pr, &j);
322 	if (error)
323 		goto out;
324 
325 	sysmsg->sysmsg_result = pr->pr_id;
326 	lockmgr(&jail_lock, LK_RELEASE);
327 
328 	return (0);
329 
330 out:
331 	/* Delete all ips */
332 	while (!SLIST_EMPTY(&pr->pr_ips)) {
333 		jip = SLIST_FIRST(&pr->pr_ips);
334 		SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
335 		kfree(jip, M_PRISON);
336 	}
337 	lockmgr(&jail_lock, LK_RELEASE);
338 	kfree(pr, M_PRISON);
339 
340 	return (error);
341 }
342 
343 /*
344  * int jail_attach(int jid);
345  *
346  * MPALMOSTSAFE
347  */
348 int
349 sys_jail_attach(struct sysmsg *sysmsg, const struct jail_attach_args *uap)
350 {
351 	int error;
352 
353 	error = caps_priv_check_self(SYSCAP_NOJAIL_ATTACH);
354 	if (error)
355 		return(error);
356 	lockmgr(&jail_lock, LK_EXCLUSIVE);
357 	error = kern_jail_attach(uap->jid);
358 	lockmgr(&jail_lock, LK_RELEASE);
359 	return (error);
360 }
361 
362 static void
363 prison_ipcache_init(struct prison *pr)
364 {
365 	struct jail_ip_storage *jis;
366 	struct sockaddr_in *ip4;
367 	struct sockaddr_in6 *ip6;
368 
369 	lockmgr(&jail_lock, LK_EXCLUSIVE);
370 	SLIST_FOREACH(jis, &pr->pr_ips, entries) {
371 		switch (jis->ip.ss_family) {
372 		case AF_INET:
373 			ip4 = (struct sockaddr_in *)&jis->ip;
374 			if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) ==
375 			    IN_LOOPBACKNET) {
376 				/* loopback address */
377 				if (pr->local_ip4 == NULL)
378 					pr->local_ip4 = ip4;
379 			} else {
380 				/* public address */
381 				if (pr->nonlocal_ip4 == NULL)
382 					pr->nonlocal_ip4 = ip4;
383 			}
384 			break;
385 
386 		case AF_INET6:
387 			ip6 = (struct sockaddr_in6 *)&jis->ip;
388 			if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) {
389 				/* loopback address */
390 				if (pr->local_ip6 == NULL)
391 					pr->local_ip6 = ip6;
392 			} else {
393 				/* public address */
394 				if (pr->nonlocal_ip6 == NULL)
395 					pr->nonlocal_ip6 = ip6;
396 			}
397 			break;
398 		}
399 	}
400 	lockmgr(&jail_lock, LK_RELEASE);
401 }
402 
403 /*
404  * Changes INADDR_LOOPBACK for a valid jail address.
405  * ip is in network byte order.
406  * Returns 1 if the ip is among jail valid ips.
407  * Returns 0 if is not among jail valid ips or
408  * if couldn't replace INADDR_LOOPBACK for a valid
409  * IP.
410  */
411 int
412 prison_replace_wildcards(struct thread *td, struct sockaddr *ip)
413 {
414 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
415 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
416 	struct prison *pr;
417 
418 	if (td->td_proc == NULL || td->td_ucred == NULL)
419 		return (1);
420 	if ((pr = td->td_ucred->cr_prison) == NULL)
421 		return (1);
422 
423 	if ((ip->sa_family == AF_INET &&
424 	    ip4->sin_addr.s_addr == htonl(INADDR_ANY)) ||
425 	    (ip->sa_family == AF_INET6 &&
426 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr)))
427 		return (1);
428 	if ((ip->sa_family == AF_INET &&
429 	    ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
430 	    (ip->sa_family == AF_INET6 &&
431 	    IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
432 		if (!prison_get_local(pr, ip->sa_family, ip) &&
433 		    !prison_get_nonlocal(pr, ip->sa_family, ip))
434 			return(0);
435 		else
436 			return(1);
437 	}
438 	if (jailed_ip(pr, ip))
439 		return(1);
440 	return(0);
441 }
442 
443 /*
444  * Convert the localhost IP to the actual jail IP
445  */
446 int
447 prison_remote_ip(struct thread *td, struct sockaddr *ip)
448 {
449 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
450 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
451 	struct prison *pr;
452 
453 	if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
454 		return(1);
455 	if ((pr = td->td_ucred->cr_prison) == NULL)
456 		return(1);
457 	if ((ip->sa_family == AF_INET &&
458 	    ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
459 	    (ip->sa_family == AF_INET6 &&
460 	    IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
461 		if (!prison_get_local(pr, ip->sa_family, ip) &&
462 		    !prison_get_nonlocal(pr, ip->sa_family, ip))
463 			return(0);
464 		else
465 			return(1);
466 	}
467 	return(1);
468 }
469 
470 /*
471  * Convert the jail IP back to localhost
472  *
473  * Used by getsockname() and getpeername() to convert the in-jail loopback
474  * address back to LOCALHOST.  For example, 127.0.0.2 -> 127.0.0.1.  The
475  * idea is that programs running inside the jail should be unaware that they
476  * are using a different loopback IP than the host.
477  */
478 __read_mostly static struct in6_addr sin6_localhost = IN6ADDR_LOOPBACK_INIT;
479 
480 int
481 prison_local_ip(struct thread *td, struct sockaddr *ip)
482 {
483 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
484 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
485 	struct prison *pr;
486 
487 	if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
488 		return(1);
489 	if ((pr = td->td_ucred->cr_prison) == NULL)
490 		return(1);
491 	if (ip->sa_family == AF_INET && pr->local_ip4 &&
492 	    pr->local_ip4->sin_addr.s_addr == ip4->sin_addr.s_addr &&
493 	    pr->local_ip4->sin_addr.s_addr != htonl(INADDR_LOOPBACK)) {
494 		ip4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
495 		return(0);
496 	}
497 	if (ip->sa_family == AF_INET6 && pr->local_ip6 &&
498 	    bcmp(&pr->local_ip6->sin6_addr, &ip6->sin6_addr,
499 		 sizeof(ip6->sin6_addr)) == 0) {
500 		bcopy(&sin6_localhost, &ip6->sin6_addr, sizeof(ip6->sin6_addr));
501 		return(0);
502 	}
503 	return(1);
504 }
505 
506 /*
507  * Prison get non loopback ip:
508  * - af is the address family of the ip we want (AF_INET|AF_INET6).
509  * - If ip != NULL, put the first IP address that is not a loopback address
510  *   into *ip.
511  *
512  * ip is in network by order and we don't touch it unless we find a valid ip.
513  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
514  * or NULL.  This struct may not be modified.
515  */
516 struct sockaddr *
517 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip)
518 {
519 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
520 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
521 
522 	/* Check if it is cached */
523 	switch(af) {
524 	case AF_INET:
525 		if (ip4 != NULL && pr->nonlocal_ip4 != NULL)
526 			ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr;
527 		return (struct sockaddr *)pr->nonlocal_ip4;
528 
529 	case AF_INET6:
530 		if (ip6 != NULL && pr->nonlocal_ip6 != NULL)
531 			ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr;
532 		return (struct sockaddr *)pr->nonlocal_ip6;
533 	}
534 
535 	/* NOTREACHED */
536 	return NULL;
537 }
538 
539 /*
540  * Prison get loopback ip.
541  * - af is the address family of the ip we want (AF_INET|AF_INET6).
542  * - If ip != NULL, put the first IP address that is not a loopback address
543  *   into *ip.
544  *
545  * ip is in network by order and we don't touch it unless we find a valid ip.
546  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
547  * or NULL.  This struct may not be modified.
548  */
549 struct sockaddr *
550 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip)
551 {
552 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
553 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
554 
555 	/* Check if it is cached */
556 	switch(af) {
557 	case AF_INET:
558 		if (ip4 != NULL && pr->local_ip4 != NULL)
559 			ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr;
560 		return (struct sockaddr *)pr->local_ip4;
561 
562 	case AF_INET6:
563 		if (ip6 != NULL && pr->local_ip6 != NULL)
564 			ip6->sin6_addr = pr->local_ip6->sin6_addr;
565 		return (struct sockaddr *)pr->local_ip6;
566 	}
567 
568 	/* NOTREACHED */
569 	return NULL;
570 }
571 
572 /* Check if the IP is among ours, if it is return 1, else 0 */
573 int
574 jailed_ip(struct prison *pr, const struct sockaddr *ip)
575 {
576 	const struct jail_ip_storage *jis;
577 	const struct sockaddr_in *jip4, *ip4;
578 	const struct sockaddr_in6 *jip6, *ip6;
579 
580 	if (pr == NULL)
581 		return(0);
582 	ip4 = (const struct sockaddr_in *)ip;
583 	ip6 = (const struct sockaddr_in6 *)ip;
584 
585 	lockmgr(&jail_lock, LK_EXCLUSIVE);
586 	SLIST_FOREACH(jis, &pr->pr_ips, entries) {
587 		switch (ip->sa_family) {
588 		case AF_INET:
589 			jip4 = (const struct sockaddr_in *) &jis->ip;
590 			if (jip4->sin_family == AF_INET &&
591 			    ip4->sin_addr.s_addr == jip4->sin_addr.s_addr) {
592 				lockmgr(&jail_lock, LK_RELEASE);
593 				return(1);
594 			}
595 			break;
596 		case AF_INET6:
597 			jip6 = (const struct sockaddr_in6 *) &jis->ip;
598 			if (jip6->sin6_family == AF_INET6 &&
599 			    IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr,
600 				&jip6->sin6_addr)) {
601 				lockmgr(&jail_lock, LK_RELEASE);
602 				return(1);
603 			}
604 			break;
605 		}
606 	}
607 	lockmgr(&jail_lock, LK_RELEASE);
608 	/* Ip not in list */
609 	return(0);
610 }
611 
612 int
613 prison_if(struct ucred *cred, struct sockaddr *sa)
614 {
615 	struct prison *pr;
616 	struct sockaddr_in *sai = (struct sockaddr_in*) sa;
617 
618 	pr = cred->cr_prison;
619 
620 	if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
621 	    && PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_UNIXIPROUTE))
622 		return(1);
623 	else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
624 		return(0);
625 	else if (jailed_ip(pr, sa))
626 		return(0);
627 	return(1);
628 }
629 
630 /*
631  * Returns a prison instance, or NULL on failure.
632  */
633 static struct prison *
634 prison_find(int prid)
635 {
636 	struct prison *pr;
637 
638 	lockmgr(&jail_lock, LK_EXCLUSIVE);
639 	LIST_FOREACH(pr, &allprison, pr_list) {
640 		if (pr->pr_id == prid)
641 			break;
642 	}
643 	lockmgr(&jail_lock, LK_RELEASE);
644 
645 	return(pr);
646 }
647 
648 static int
649 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
650 {
651 	struct thread *td = curthread;
652 	struct jail_ip_storage *jip;
653 #ifdef INET6
654 	struct sockaddr_in6 *jsin6;
655 #endif
656 	struct sockaddr_in *jsin;
657 	struct lwp *lp;
658 	struct prison *pr;
659 	unsigned int jlssize, jlsused;
660 	int count, error;
661 	char *jls; /* Jail list */
662 	char *oip; /* Output ip */
663 	char *fullpath, *freepath;
664 
665 	jlsused = 0;
666 
667 	if (jailed(td->td_ucred))
668 		return (0);
669 	lp = td->td_lwp;
670 retry:
671 	count = prisoncount;
672 
673 	if (count == 0)
674 		return(0);
675 
676 	jlssize = (count * 1024);
677 	jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO);
678 	if (count < prisoncount) {
679 		kfree(jls, M_TEMP);
680 		goto retry;
681 	}
682 	count = prisoncount;
683 
684 	lockmgr(&jail_lock, LK_EXCLUSIVE);
685 	LIST_FOREACH(pr, &allprison, pr_list) {
686 		error = cache_fullpath(lp->lwp_proc, &pr->pr_root, NULL,
687 					&fullpath, &freepath, 0);
688 		if (error)
689 			continue;
690 		if (jlsused && jlsused < jlssize)
691 			jls[jlsused++] = '\n';
692 		count = ksnprintf(jls + jlsused, (jlssize - jlsused),
693 				 "%d %s %s",
694 				 pr->pr_id, pr->pr_host, fullpath);
695 		kfree(freepath, M_TEMP);
696 		if (count < 0)
697 			goto end;
698 		jlsused += count;
699 
700 		/* Copy the IPS */
701 		SLIST_FOREACH(jip, &pr->pr_ips, entries) {
702 			char buf[INET_ADDRSTRLEN];
703 
704 			jsin = (struct sockaddr_in *)&jip->ip;
705 
706 			switch(jsin->sin_family) {
707 			case AF_INET:
708 				oip = kinet_ntoa(jsin->sin_addr, buf);
709 				break;
710 #ifdef INET6
711 			case AF_INET6:
712 				jsin6 = (struct sockaddr_in6 *)&jip->ip;
713 				oip = ip6_sprintf(&jsin6->sin6_addr);
714 				break;
715 #endif
716 			default:
717 				oip = "?family?";
718 				break;
719 			}
720 
721 			if ((jlssize - jlsused) < (strlen(oip) + 1)) {
722 				error = ERANGE;
723 				goto end;
724 			}
725 			count = ksnprintf(jls + jlsused, (jlssize - jlsused),
726 					  " %s", oip);
727 			if (count < 0)
728 				goto end;
729 			jlsused += count;
730 		}
731 	}
732 
733 	/*
734 	 * The format is:
735 	 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id...
736 	 */
737 	error = SYSCTL_OUT(req, jls, jlsused);
738 end:
739 	lockmgr(&jail_lock, LK_RELEASE);
740 	kfree(jls, M_TEMP);
741 
742 	return(error);
743 }
744 
745 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
746 	   sysctl_jail_list, "A", "List of active jails");
747 
748 static int
749 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
750 {
751 	int error, injail;
752 
753 	injail = jailed(req->td->td_ucred);
754 	error = SYSCTL_OUT(req, &injail, sizeof(injail));
755 
756 	return (error);
757 }
758 
759 SYSCTL_PROC(_jail, OID_AUTO, jailed,
760 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NOLOCK, NULL, 0,
761 	    sysctl_jail_jailed, "I", "Process in jail?");
762 
763 /*
764  * MPSAFE
765  */
766 void
767 prison_hold(struct prison *pr)
768 {
769 	atomic_add_int(&pr->pr_ref, 1);
770 #ifdef PRISON_DEBUG
771 	if (prison_debug > 0) {
772 		--prison_debug;
773 		print_backtrace(-1);
774 	}
775 #endif
776 }
777 
778 /*
779  * MPALMOSTSAFE
780  */
781 void
782 prison_free(struct prison *pr)
783 {
784 	struct jail_ip_storage *jls;
785 
786 #ifdef PRISON_DEBUG
787 	if (prison_debug > 0) {
788 		--prison_debug;
789 		print_backtrace(-1);
790 	}
791 #endif
792 	KKASSERT(pr->pr_ref > 0);
793 	if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1)
794 		return;
795 
796 	/*
797 	 * The global jail lock is needed on the last ref to adjust
798 	 * the list.
799 	 */
800 	lockmgr(&jail_lock, LK_EXCLUSIVE);
801 	if (pr->pr_ref) {
802 		lockmgr(&jail_lock, LK_RELEASE);
803 		return;
804 	}
805 	LIST_REMOVE(pr, pr_list);
806 	--prisoncount;
807 
808 	/*
809 	 * Clean up
810 	 */
811 	while (!SLIST_EMPTY(&pr->pr_ips)) {
812 		jls = SLIST_FIRST(&pr->pr_ips);
813 		SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
814 		kfree(jls, M_PRISON);
815 	}
816 	lockmgr(&jail_lock, LK_RELEASE);
817 
818 	if (pr->pr_linux != NULL)
819 		kfree(pr->pr_linux, M_PRISON);
820 	varsymset_clean(&pr->pr_varsymset);
821 
822 	/* Release the sysctl tree */
823 	prison_sysctl_done(pr);
824 
825 	cache_drop(&pr->pr_root);
826 	kfree(pr, M_PRISON);
827 }
828 
829 /*
830  * Check if permisson for a specific privilege is granted within jail.
831  *
832  * MPSAFE
833  */
834 int
835 prison_priv_check(struct ucred *cred, int cap)
836 {
837 	struct prison *pr = cred->cr_prison;
838 
839 	if (!jailed(cred))
840 		return (0);
841 
842 	switch (cap & ~__SYSCAP_XFLAGS) {
843 	case SYSCAP_NOCRED_SETUID:
844 	case SYSCAP_NOCRED_SETGID:
845 	case SYSCAP_NOCRED_SETEUID:
846 	case SYSCAP_NOCRED_SETEGID:
847 	case SYSCAP_NOCRED_SETREUID:
848 	case SYSCAP_NOCRED_SETREGID:
849 	case SYSCAP_NOCRED_SETRESUID:
850 	case SYSCAP_NOCRED_SETRESGID:
851 	case SYSCAP_NOCRED_SETGROUPS:
852 
853 	case SYSCAP_NOVFS_SYSFLAGS:
854 	case SYSCAP_NOVFS_CHOWN:
855 	case SYSCAP_NOVFS_CHMOD:
856 	case SYSCAP_NOVFS_CHROOT:
857 	case SYSCAP_NOVFS_LINK:
858 	case SYSCAP_NOVFS_CHFLAGS_DEV:
859 	case SYSCAP_NOVFS_REVOKE:
860 	case SYSCAP_NOVFS_MKNOD_BAD:
861 	case SYSCAP_NOVFS_MKNOD_WHT:
862 	case SYSCAP_NOVFS_MKNOD_DIR:
863 		return (0);
864 
865 	case SYSCAP_NOMOUNT_NULLFS:
866 		if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_NULLFS))
867 			return (0);
868 		else
869 			return (EPERM);
870 	case SYSCAP_NOMOUNT_DEVFS:
871 		return (EPERM);
872 	case SYSCAP_NOMOUNT_TMPFS:
873 		if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_TMPFS))
874 			return (0);
875 		else
876 			return (EPERM);
877 
878 	case SYSCAP_NOVFS_SETATTR:
879 	case SYSCAP_NOVFS_SETGID:
880 
881 	case SYSCAP_NOPROC_SETRLIMIT:
882 	case SYSCAP_NOPROC_SETLOGIN:
883 
884 	case SYSCAP_NOSYSCTL_WR:
885 
886 	case SYSCAP_NOVARSYM_SYS:
887 
888 	case SYSCAP_NOSETHOSTNAME:
889 
890 	case SYSCAP_NOPROC_TRESPASS:
891 		return (0);
892 
893 	case SYSCAP_NOQUOTA_WR:
894 		return (0);
895 
896 	case SYSCAP_NODEBUG_UNPRIV:
897 		return (0);
898 
899 		/*
900 		 * Allow jailed root to bind reserved ports.
901 		 */
902 	case SYSCAP_NONET_RESPORT:
903 		return (0);
904 
905 
906 		/*
907 		 * Conditionally allow creating raw sockets in jail.
908 		 */
909 	case SYSCAP_NONET_RAW:
910 		if (PRISON_CAP_ISSET(pr->pr_caps,
911 			PRISON_CAP_NET_RAW_SOCKETS))
912 			return (0);
913 		else
914 			return (EPERM);
915 
916 	case SYSCAP_NOVFS_IOCTL:
917 		return (0);
918 
919 	default:
920 
921 		return (EPERM);
922 	}
923 }
924 
925 
926 /*
927  * Create a per-jail sysctl tree to control the prison
928  */
929 int
930 prison_sysctl_create(struct prison *pr)
931 {
932 	char id_str[7];
933 
934 	ksnprintf(id_str, 6, "%d", pr->pr_id);
935 
936 	pr->pr_sysctl_ctx = (struct sysctl_ctx_list *) kmalloc(
937 		sizeof(struct sysctl_ctx_list), M_PRISON, M_WAITOK | M_ZERO);
938 
939 	sysctl_ctx_init(pr->pr_sysctl_ctx);
940 
941 	/* Main jail node */
942 	pr->pr_sysctl_tree = SYSCTL_ADD_NODE(pr->pr_sysctl_ctx,
943 	    SYSCTL_STATIC_CHILDREN(_jail),
944 	    OID_AUTO, id_str, CTLFLAG_RD, 0,
945 	    "Jail specific settings");
946 
947 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
948 	    OID_AUTO, "sys_set_hostname", CTLFLAG_RW,
949 	    &pr->pr_caps, 0, PRISON_CAP_SYS_SET_HOSTNAME,
950 	    "Processes in jail can set their hostnames");
951 
952 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
953 	    OID_AUTO, "sys_sysvipc", CTLFLAG_RW,
954 	    &pr->pr_caps, 0, PRISON_CAP_SYS_SYSVIPC,
955 	    "Processes in jail can use System V IPC primitives");
956 
957 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
958 	    OID_AUTO, "net_unixiproute", CTLFLAG_RW,
959 	    &pr->pr_caps, 0, PRISON_CAP_NET_UNIXIPROUTE,
960 	    "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
961 
962 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
963 	    OID_AUTO, "net_raw_sockets", CTLFLAG_RW,
964 	    &pr->pr_caps, 0, PRISON_CAP_NET_RAW_SOCKETS,
965 	    "Process in jail can create raw sockets");
966 
967 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
968 	    OID_AUTO, "allow_listen_override", CTLFLAG_RW,
969 	    &pr->pr_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE,
970 	    "Process in jail can create raw sockets");
971 
972 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
973 	    OID_AUTO, "vfs_chflags", CTLFLAG_RW,
974 	    &pr->pr_caps, 0, PRISON_CAP_VFS_CHFLAGS,
975 	    "Process in jail can override host wildcard listen");
976 
977 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
978 	    OID_AUTO, "vfs_mount_nullfs", CTLFLAG_RW,
979 	    &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS,
980 	    "Processes in jail can mount nullfs(5) filesystems");
981 
982 	SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree),
983 	    OID_AUTO, "vfs_mount_tmpfs", CTLFLAG_RW,
984 	    &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS,
985 	    "Processes in jail can mount tmpfs(5) filesystems");
986 
987 	return 0;
988 }
989 
990 int
991 prison_sysctl_done(struct prison *pr)
992 {
993 	if (pr->pr_sysctl_tree) {
994 		sysctl_ctx_free(pr->pr_sysctl_ctx);
995 		kfree(pr->pr_sysctl_ctx, M_PRISON);
996 		pr->pr_sysctl_tree = NULL;
997 	}
998 
999 	return 0;
1000 }
1001