xref: /dflybsd-src/sys/kern/kern_jail.c (revision 086b156cdf18f6fbb398ffa6f62a25f4f158853e)
1 /*
2  * ----------------------------------------------------------------------------
3  * "THE BEER-WARE LICENSE" (Revision 42):
4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5  * can do whatever you want with this stuff. If we meet some day, and you think
6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7  * ----------------------------------------------------------------------------
8  *
9  */
10 /*-
11  * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net>
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 
37 /*
38  * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $
39  * $DragonFly: src/sys/kern/kern_jail.c,v 1.19 2008/05/17 18:20:33 dillon Exp $
40  */
41 
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/kernel.h>
47 #include <sys/systm.h>
48 #include <sys/errno.h>
49 #include <sys/sysproto.h>
50 #include <sys/malloc.h>
51 #include <sys/nlookup.h>
52 #include <sys/namecache.h>
53 #include <sys/proc.h>
54 #include <sys/priv.h>
55 #include <sys/jail.h>
56 #include <sys/socket.h>
57 #include <sys/kern_syscall.h>
58 #include <net/if.h>
59 #include <netinet/in.h>
60 #include <netinet6/in6_var.h>
61 
62 static struct prison	*prison_find(int);
63 static void		prison_ipcache_init(struct prison *);
64 
65 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
66 
67 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0,
68     "All jails settings");
69 
70 SYSCTL_NODE(_jail, OID_AUTO, defaults, CTLFLAG_RW, 0,
71     "Default options for jails");
72 
73 int	jail_set_hostname_allowed = 1;
74 SYSCTL_INT(_jail_defaults, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
75     &jail_set_hostname_allowed, 0,
76     "Processes in jail can set their hostnames");
77 
78 int	jail_socket_unixiproute_only = 1;
79 SYSCTL_INT(_jail_defaults, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
80     &jail_socket_unixiproute_only, 0,
81     "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
82 
83 int	jail_sysvipc_allowed = 0;
84 SYSCTL_INT(_jail_defaults, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
85     &jail_sysvipc_allowed, 0,
86     "Processes in jail can use System V IPC primitives");
87 
88 int    jail_chflags_allowed = 0;
89 SYSCTL_INT(_jail_defaults, OID_AUTO, chflags_allowed, CTLFLAG_RW,
90     &jail_chflags_allowed, 0,
91     "Processes in jail can alter system file flags");
92 
93 int    jail_allow_raw_sockets = 0;
94 SYSCTL_INT(_jail_defaults, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
95     &jail_allow_raw_sockets, 0,
96     "Process in jail can create raw sockets");
97 
98 int	lastprid = 0;
99 int	prisoncount = 0;
100 
101 static struct lock jail_lock =
102        LOCK_INITIALIZER("jail", 0, LK_CANRECURSE);
103 
104 LIST_HEAD(prisonlist, prison);
105 struct	prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison);
106 
107 static int
108 kern_jail_attach(int jid)
109 {
110 	struct proc *p = curthread->td_proc;
111 	struct prison *pr;
112 	struct ucred *cr;
113 	int error;
114 
115 	pr = prison_find(jid);
116 	if (pr == NULL)
117 		return(EINVAL);
118 
119 	error = kern_chroot(&pr->pr_root);
120 	if (error)
121 		return(error);
122 
123 	prison_hold(pr);
124 	lwkt_gettoken(&p->p_token);
125 	cr = cratom_proc(p);
126 	cr->cr_prison = pr;
127 	p->p_flags |= P_JAILED;
128 	lwkt_reltoken(&p->p_token);
129 
130 	return(0);
131 }
132 
133 static int
134 assign_prison_id(struct prison *pr)
135 {
136 	int tryprid;
137 	struct prison *tpr;
138 
139 	tryprid = lastprid + 1;
140 	if (tryprid == JAIL_MAX)
141 		tryprid = 1;
142 
143 	lockmgr(&jail_lock, LK_EXCLUSIVE);
144 next:
145 	LIST_FOREACH(tpr, &allprison, pr_list) {
146 		if (tpr->pr_id != tryprid)
147 			continue;
148 		tryprid++;
149 		if (tryprid == JAIL_MAX) {
150 			lockmgr(&jail_lock, LK_RELEASE);
151 			return (ERANGE);
152 		}
153 		goto next;
154 	}
155 	pr->pr_id = lastprid = tryprid;
156 	lockmgr(&jail_lock, LK_RELEASE);
157 
158 	return (0);
159 }
160 
161 static int
162 kern_jail(struct prison *pr, struct jail *j)
163 {
164 	int error;
165 	struct nlookupdata nd;
166 
167 	error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW);
168 	if (error) {
169 		nlookup_done(&nd);
170 		return (error);
171 	}
172 	error = nlookup(&nd);
173 	if (error) {
174 		nlookup_done(&nd);
175 		return (error);
176 	}
177 	cache_copy(&nd.nl_nch, &pr->pr_root);
178 
179 	varsymset_init(&pr->pr_varsymset, NULL);
180 	prison_ipcache_init(pr);
181 
182 	error = assign_prison_id(pr);
183 	if (error) {
184 		varsymset_clean(&pr->pr_varsymset);
185 		nlookup_done(&nd);
186 		return (error);
187 	}
188 
189 	lockmgr(&jail_lock, LK_EXCLUSIVE);
190 	LIST_INSERT_HEAD(&allprison, pr, pr_list);
191 	++prisoncount;
192 	lockmgr(&jail_lock, LK_RELEASE);
193 
194 	error = prison_sysctl_create(pr);
195 	if (error)
196 		goto out;
197 
198 	error = kern_jail_attach(pr->pr_id);
199 	if (error)
200 		goto out2;
201 
202 	nlookup_done(&nd);
203 	return 0;
204 
205 out2:
206 	prison_sysctl_done(pr);
207 
208 out:
209 	lockmgr(&jail_lock, LK_EXCLUSIVE);
210 	LIST_REMOVE(pr, pr_list);
211 	--prisoncount;
212 	lockmgr(&jail_lock, LK_RELEASE);
213 	varsymset_clean(&pr->pr_varsymset);
214 	nlookup_done(&nd);
215 	return (error);
216 }
217 
218 /*
219  * jail()
220  *
221  * jail_args(syscallarg(struct jail *) jail)
222  *
223  * MPALMOSTSAFE
224  */
225 int
226 sys_jail(struct jail_args *uap)
227 {
228 	struct thread *td = curthread;
229 	struct prison *pr;
230 	struct jail_ip_storage *jip;
231 	struct jail j;
232 	int error;
233 	uint32_t jversion;
234 
235 	uap->sysmsg_result = -1;
236 
237 	error = priv_check(td, PRIV_JAIL_CREATE);
238 	if (error)
239 		return (error);
240 
241 	error = copyin(uap->jail, &jversion, sizeof(jversion));
242 	if (error)
243 		return (error);
244 
245 	pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
246 	SLIST_INIT(&pr->pr_ips);
247 	lockmgr(&jail_lock, LK_EXCLUSIVE);
248 
249 	switch (jversion) {
250 	case 0:
251 		/* Single IPv4 jails. */
252 		{
253 		struct jail_v0 jv0;
254 		struct sockaddr_in ip4addr;
255 
256 		error = copyin(uap->jail, &jv0, sizeof(jv0));
257 		if (error)
258 			goto out;
259 
260 		j.path = jv0.path;
261 		j.hostname = jv0.hostname;
262 
263 		jip = kmalloc(sizeof(*jip),  M_PRISON, M_WAITOK | M_ZERO);
264 		ip4addr.sin_family = AF_INET;
265 		ip4addr.sin_addr.s_addr = htonl(jv0.ip_number);
266 		memcpy(&jip->ip, &ip4addr, sizeof(ip4addr));
267 		SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
268 		break;
269 		}
270 
271 	case 1:
272 		/*
273 		 * DragonFly multi noIP/IPv4/IPv6 jails
274 		 *
275 		 * NOTE: This version is unsupported by FreeBSD
276 		 * (which uses version 2 instead).
277 		 */
278 
279 		error = copyin(uap->jail, &j, sizeof(j));
280 		if (error)
281 			goto out;
282 
283 		for (int i = 0; i < j.n_ips; i++) {
284 			jip = kmalloc(sizeof(*jip), M_PRISON,
285 				      M_WAITOK | M_ZERO);
286 			SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
287 			error = copyin(&j.ips[i], &jip->ip,
288 					sizeof(struct sockaddr_storage));
289 			if (error)
290 				goto out;
291 		}
292 		break;
293 	default:
294 		error = EINVAL;
295 		goto out;
296 	}
297 
298 	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
299 	if (error)
300 		goto out;
301 
302 	/* Global settings are the default for all jails */
303 	pr->pr_set_hostname_allowed = jail_set_hostname_allowed;
304 	pr->pr_socket_unixiproute_only = jail_socket_unixiproute_only;
305 	pr->pr_sysvipc_allowed = jail_sysvipc_allowed;
306 	pr->pr_chflags_allowed = jail_chflags_allowed;
307 	pr->pr_allow_raw_sockets = jail_allow_raw_sockets;
308 
309 	error = kern_jail(pr, &j);
310 	if (error)
311 		goto out;
312 
313 	uap->sysmsg_result = pr->pr_id;
314 	lockmgr(&jail_lock, LK_RELEASE);
315 
316 	return (0);
317 
318 out:
319 	/* Delete all ips */
320 	while (!SLIST_EMPTY(&pr->pr_ips)) {
321 		jip = SLIST_FIRST(&pr->pr_ips);
322 		SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
323 		kfree(jip, M_PRISON);
324 	}
325 	lockmgr(&jail_lock, LK_RELEASE);
326 	kfree(pr, M_PRISON);
327 
328 	return (error);
329 }
330 
331 /*
332  * int jail_attach(int jid);
333  *
334  * MPALMOSTSAFE
335  */
336 int
337 sys_jail_attach(struct jail_attach_args *uap)
338 {
339 	struct thread *td = curthread;
340 	int error;
341 
342 	error = priv_check(td, PRIV_JAIL_ATTACH);
343 	if (error)
344 		return(error);
345 	lockmgr(&jail_lock, LK_EXCLUSIVE);
346 	error = kern_jail_attach(uap->jid);
347 	lockmgr(&jail_lock, LK_RELEASE);
348 	return (error);
349 }
350 
351 static void
352 prison_ipcache_init(struct prison *pr)
353 {
354 	struct jail_ip_storage *jis;
355 	struct sockaddr_in *ip4;
356 	struct sockaddr_in6 *ip6;
357 
358 	lockmgr(&jail_lock, LK_EXCLUSIVE);
359 	SLIST_FOREACH(jis, &pr->pr_ips, entries) {
360 		switch (jis->ip.ss_family) {
361 		case AF_INET:
362 			ip4 = (struct sockaddr_in *)&jis->ip;
363 			if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) ==
364 			    IN_LOOPBACKNET) {
365 				/* loopback address */
366 				if (pr->local_ip4 == NULL)
367 					pr->local_ip4 = ip4;
368 			} else {
369 				/* public address */
370 				if (pr->nonlocal_ip4 == NULL)
371 					pr->nonlocal_ip4 = ip4;
372 			}
373 			break;
374 
375 		case AF_INET6:
376 			ip6 = (struct sockaddr_in6 *)&jis->ip;
377 			if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) {
378 				/* loopback address */
379 				if (pr->local_ip6 == NULL)
380 					pr->local_ip6 = ip6;
381 			} else {
382 				/* public address */
383 				if (pr->nonlocal_ip6 == NULL)
384 					pr->nonlocal_ip6 = ip6;
385 			}
386 			break;
387 		}
388 	}
389 	lockmgr(&jail_lock, LK_RELEASE);
390 }
391 
392 /*
393  * Changes INADDR_LOOPBACK for a valid jail address.
394  * ip is in network byte order.
395  * Returns 1 if the ip is among jail valid ips.
396  * Returns 0 if is not among jail valid ips or
397  * if couldn't replace INADDR_LOOPBACK for a valid
398  * IP.
399  */
400 int
401 prison_replace_wildcards(struct thread *td, struct sockaddr *ip)
402 {
403 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
404 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
405 	struct prison *pr;
406 
407 	if (td->td_proc == NULL || td->td_ucred == NULL)
408 		return (1);
409 	if ((pr = td->td_ucred->cr_prison) == NULL)
410 		return (1);
411 
412 	if ((ip->sa_family == AF_INET &&
413 	    ip4->sin_addr.s_addr == htonl(INADDR_ANY)) ||
414 	    (ip->sa_family == AF_INET6 &&
415 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr)))
416 		return (1);
417 	if ((ip->sa_family == AF_INET &&
418 	    ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
419 	    (ip->sa_family == AF_INET6 &&
420 	    IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
421 		if (!prison_get_local(pr, ip->sa_family, ip) &&
422 		    !prison_get_nonlocal(pr, ip->sa_family, ip))
423 			return(0);
424 		else
425 			return(1);
426 	}
427 	if (jailed_ip(pr, ip))
428 		return(1);
429 	return(0);
430 }
431 
432 int
433 prison_remote_ip(struct thread *td, struct sockaddr *ip)
434 {
435 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
436 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
437 	struct prison *pr;
438 
439 	if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
440 		return(1);
441 	if ((pr = td->td_ucred->cr_prison) == NULL)
442 		return(1);
443 	if ((ip->sa_family == AF_INET &&
444 	    ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
445 	    (ip->sa_family == AF_INET6 &&
446 	    IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
447 		if (!prison_get_local(pr, ip->sa_family, ip) &&
448 		    !prison_get_nonlocal(pr, ip->sa_family, ip))
449 			return(0);
450 		else
451 			return(1);
452 	}
453 	return(1);
454 }
455 
456 /*
457  * Prison get non loopback ip:
458  * - af is the address family of the ip we want (AF_INET|AF_INET6).
459  * - If ip != NULL, put the first IP address that is not a loopback address
460  *   into *ip.
461  *
462  * ip is in network by order and we don't touch it unless we find a valid ip.
463  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
464  * or NULL.  This struct may not be modified.
465  */
466 struct sockaddr *
467 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip)
468 {
469 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
470 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
471 
472 	/* Check if it is cached */
473 	switch(af) {
474 	case AF_INET:
475 		if (ip4 != NULL && pr->nonlocal_ip4 != NULL)
476 			ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr;
477 		return (struct sockaddr *)pr->nonlocal_ip4;
478 
479 	case AF_INET6:
480 		if (ip6 != NULL && pr->nonlocal_ip6 != NULL)
481 			ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr;
482 		return (struct sockaddr *)pr->nonlocal_ip6;
483 	}
484 
485 	/* NOTREACHED */
486 	return NULL;
487 }
488 
489 /*
490  * Prison get loopback ip.
491  * - af is the address family of the ip we want (AF_INET|AF_INET6).
492  * - If ip != NULL, put the first IP address that is not a loopback address
493  *   into *ip.
494  *
495  * ip is in network by order and we don't touch it unless we find a valid ip.
496  * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
497  * or NULL.  This struct may not be modified.
498  */
499 struct sockaddr *
500 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip)
501 {
502 	struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
503 	struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
504 
505 	/* Check if it is cached */
506 	switch(af) {
507 	case AF_INET:
508 		if (ip4 != NULL && pr->local_ip4 != NULL)
509 			ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr;
510 		return (struct sockaddr *)pr->local_ip4;
511 
512 	case AF_INET6:
513 		if (ip6 != NULL && pr->local_ip6 != NULL)
514 			ip6->sin6_addr = pr->local_ip6->sin6_addr;
515 		return (struct sockaddr *)pr->local_ip6;
516 	}
517 
518 	/* NOTREACHED */
519 	return NULL;
520 }
521 
522 /* Check if the IP is among ours, if it is return 1, else 0 */
523 int
524 jailed_ip(struct prison *pr, struct sockaddr *ip)
525 {
526 	struct jail_ip_storage *jis;
527 	struct sockaddr_in *jip4, *ip4;
528 	struct sockaddr_in6 *jip6, *ip6;
529 
530 	if (pr == NULL)
531 		return(0);
532 	ip4 = (struct sockaddr_in *)ip;
533 	ip6 = (struct sockaddr_in6 *)ip;
534 
535 	lockmgr(&jail_lock, LK_EXCLUSIVE);
536 	SLIST_FOREACH(jis, &pr->pr_ips, entries) {
537 		switch (ip->sa_family) {
538 		case AF_INET:
539 			jip4 = (struct sockaddr_in *) &jis->ip;
540 			if (jip4->sin_family == AF_INET &&
541 			    ip4->sin_addr.s_addr == jip4->sin_addr.s_addr) {
542 				lockmgr(&jail_lock, LK_RELEASE);
543 				return(1);
544 			}
545 			break;
546 		case AF_INET6:
547 			jip6 = (struct sockaddr_in6 *) &jis->ip;
548 			if (jip6->sin6_family == AF_INET6 &&
549 			    IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr,
550 				&jip6->sin6_addr)) {
551 				lockmgr(&jail_lock, LK_RELEASE);
552 				return(1);
553 			}
554 			break;
555 		}
556 	}
557 	lockmgr(&jail_lock, LK_RELEASE);
558 	/* Ip not in list */
559 	return(0);
560 }
561 
562 int
563 prison_if(struct ucred *cred, struct sockaddr *sa)
564 {
565 	struct prison *pr;
566 	struct sockaddr_in *sai = (struct sockaddr_in*) sa;
567 
568 	pr = cred->cr_prison;
569 
570 	if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
571 	    && jail_socket_unixiproute_only)
572 		return(1);
573 	else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
574 		return(0);
575 	else if (jailed_ip(pr, sa))
576 		return(0);
577 	return(1);
578 }
579 
580 /*
581  * Returns a prison instance, or NULL on failure.
582  */
583 static struct prison *
584 prison_find(int prid)
585 {
586 	struct prison *pr;
587 
588 	lockmgr(&jail_lock, LK_EXCLUSIVE);
589 	LIST_FOREACH(pr, &allprison, pr_list) {
590 		if (pr->pr_id == prid)
591 			break;
592 	}
593 	lockmgr(&jail_lock, LK_RELEASE);
594 
595 	return(pr);
596 }
597 
598 static int
599 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
600 {
601 	struct thread *td = curthread;
602 	struct jail_ip_storage *jip;
603 #ifdef INET6
604 	struct sockaddr_in6 *jsin6;
605 #endif
606 	struct sockaddr_in *jsin;
607 	struct lwp *lp;
608 	struct prison *pr;
609 	unsigned int jlssize, jlsused;
610 	int count, error;
611 	char *jls; /* Jail list */
612 	char *oip; /* Output ip */
613 	char *fullpath, *freepath;
614 
615 	jlsused = 0;
616 
617 	if (jailed(td->td_ucred))
618 		return (0);
619 	lp = td->td_lwp;
620 retry:
621 	count = prisoncount;
622 
623 	if (count == 0)
624 		return(0);
625 
626 	jlssize = (count * 1024);
627 	jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO);
628 	if (count < prisoncount) {
629 		kfree(jls, M_TEMP);
630 		goto retry;
631 	}
632 	count = prisoncount;
633 
634 	lockmgr(&jail_lock, LK_EXCLUSIVE);
635 	LIST_FOREACH(pr, &allprison, pr_list) {
636 		error = cache_fullpath(lp->lwp_proc, &pr->pr_root, NULL,
637 					&fullpath, &freepath, 0);
638 		if (error)
639 			continue;
640 		if (jlsused && jlsused < jlssize)
641 			jls[jlsused++] = '\n';
642 		count = ksnprintf(jls + jlsused, (jlssize - jlsused),
643 				 "%d %s %s",
644 				 pr->pr_id, pr->pr_host, fullpath);
645 		kfree(freepath, M_TEMP);
646 		if (count < 0)
647 			goto end;
648 		jlsused += count;
649 
650 		/* Copy the IPS */
651 		SLIST_FOREACH(jip, &pr->pr_ips, entries) {
652 			char buf[INET_ADDRSTRLEN];
653 
654 			jsin = (struct sockaddr_in *)&jip->ip;
655 
656 			switch(jsin->sin_family) {
657 			case AF_INET:
658 				oip = kinet_ntoa(jsin->sin_addr, buf);
659 				break;
660 #ifdef INET6
661 			case AF_INET6:
662 				jsin6 = (struct sockaddr_in6 *)&jip->ip;
663 				oip = ip6_sprintf(&jsin6->sin6_addr);
664 				break;
665 #endif
666 			default:
667 				oip = "?family?";
668 				break;
669 			}
670 
671 			if ((jlssize - jlsused) < (strlen(oip) + 1)) {
672 				error = ERANGE;
673 				goto end;
674 			}
675 			count = ksnprintf(jls + jlsused, (jlssize - jlsused),
676 					  " %s", oip);
677 			if (count < 0)
678 				goto end;
679 			jlsused += count;
680 		}
681 	}
682 
683 	/*
684 	 * The format is:
685 	 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id...
686 	 */
687 	error = SYSCTL_OUT(req, jls, jlsused);
688 end:
689 	lockmgr(&jail_lock, LK_RELEASE);
690 	kfree(jls, M_TEMP);
691 
692 	return(error);
693 }
694 
695 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
696 	   sysctl_jail_list, "A", "List of active jails");
697 
698 static int
699 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
700 {
701 	int error, injail;
702 
703 	injail = jailed(req->td->td_ucred);
704 	error = SYSCTL_OUT(req, &injail, sizeof(injail));
705 
706 	return (error);
707 }
708 
709 SYSCTL_PROC(_jail, OID_AUTO, jailed,
710 	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NOLOCK, NULL, 0,
711 	    sysctl_jail_jailed, "I", "Process in jail?");
712 
713 /*
714  * MPSAFE
715  */
716 void
717 prison_hold(struct prison *pr)
718 {
719 	atomic_add_int(&pr->pr_ref, 1);
720 }
721 
722 /*
723  * MPALMOSTSAFE
724  */
725 void
726 prison_free(struct prison *pr)
727 {
728 	struct jail_ip_storage *jls;
729 
730 	KKASSERT(pr->pr_ref > 0);
731 	if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1)
732 		return;
733 
734 	/*
735 	 * The MP lock is needed on the last ref to adjust
736 	 * the list.
737 	 */
738 	lockmgr(&jail_lock, LK_EXCLUSIVE);
739 	if (pr->pr_ref) {
740 		lockmgr(&jail_lock, LK_RELEASE);
741 		return;
742 	}
743 	LIST_REMOVE(pr, pr_list);
744 	--prisoncount;
745 
746 	/*
747 	 * Clean up
748 	 */
749 	while (!SLIST_EMPTY(&pr->pr_ips)) {
750 		jls = SLIST_FIRST(&pr->pr_ips);
751 		SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
752 		kfree(jls, M_PRISON);
753 	}
754 	lockmgr(&jail_lock, LK_RELEASE);
755 
756 	if (pr->pr_linux != NULL)
757 		kfree(pr->pr_linux, M_PRISON);
758 	varsymset_clean(&pr->pr_varsymset);
759 
760 	/* Release the sysctl tree */
761 	prison_sysctl_done(pr);
762 
763 	cache_drop(&pr->pr_root);
764 	kfree(pr, M_PRISON);
765 }
766 
767 /*
768  * Check if permisson for a specific privilege is granted within jail.
769  *
770  * MPSAFE
771  */
772 int
773 prison_priv_check(struct ucred *cred, int priv)
774 {
775 	struct prison *pr = cred->cr_prison;
776 
777 	if (!jailed(cred))
778 		return (0);
779 
780 	switch (priv) {
781 	case PRIV_CRED_SETUID:
782 	case PRIV_CRED_SETEUID:
783 	case PRIV_CRED_SETGID:
784 	case PRIV_CRED_SETEGID:
785 	case PRIV_CRED_SETGROUPS:
786 	case PRIV_CRED_SETREUID:
787 	case PRIV_CRED_SETREGID:
788 	case PRIV_CRED_SETRESUID:
789 	case PRIV_CRED_SETRESGID:
790 
791 	case PRIV_VFS_SYSFLAGS:
792 	case PRIV_VFS_CHOWN:
793 	case PRIV_VFS_CHMOD:
794 	case PRIV_VFS_CHROOT:
795 	case PRIV_VFS_LINK:
796 	case PRIV_VFS_CHFLAGS_DEV:
797 	case PRIV_VFS_REVOKE:
798 	case PRIV_VFS_MKNOD_BAD:
799 	case PRIV_VFS_MKNOD_WHT:
800 	case PRIV_VFS_MKNOD_DIR:
801 	case PRIV_VFS_SETATTR:
802 	case PRIV_VFS_SETGID:
803 
804 	case PRIV_PROC_SETRLIMIT:
805 	case PRIV_PROC_SETLOGIN:
806 
807 	case PRIV_SYSCTL_WRITEJAIL:
808 
809 	case PRIV_VARSYM_SYS:
810 
811 	case PRIV_SETHOSTNAME:
812 
813 	case PRIV_PROC_TRESPASS:
814 
815 		return (0);
816 
817 	case PRIV_UFS_QUOTAON:
818 	case PRIV_UFS_QUOTAOFF:
819 	case PRIV_VFS_SETQUOTA:
820 	case PRIV_UFS_SETUSE:
821 	case PRIV_VFS_GETQUOTA:
822 		return (0);
823 
824 
825 	case PRIV_DEBUG_UNPRIV:
826 		return (0);
827 
828 
829 		/*
830 		 * Allow jailed root to bind reserved ports.
831 		 */
832 	case PRIV_NETINET_RESERVEDPORT:
833 		return (0);
834 
835 
836 		/*
837 		 * Conditionally allow creating raw sockets in jail.
838 		 */
839 	case PRIV_NETINET_RAW:
840 		if (pr->pr_allow_raw_sockets)
841 			return (0);
842 		else
843 			return (EPERM);
844 
845 	case PRIV_HAMMER_IOCTL:
846 		return (0);
847 
848 	default:
849 
850 		return (EPERM);
851 	}
852 }
853 
854 static int
855 sysctl_prison_switch_int8(SYSCTL_HANDLER_ARGS, int8_t *val)
856 {
857 	int new_val;
858 	int error;
859 
860 	new_val = *val;
861 
862 	error = sysctl_handle_int(oidp, &new_val, 0, req);
863 	if (error != 0 || req->newptr == NULL)
864 		return error;
865 
866 	if (new_val != 0 && new_val != 1)
867 		return EINVAL;
868 
869 	*val = new_val;
870 
871 	return error;
872 
873 }
874 
875 static int
876 sysctl_prison_set_hostname_allowed(SYSCTL_HANDLER_ARGS)
877 {
878 	struct prison *pr;
879 	int error;
880 
881 	if (arg1 == NULL)
882 		return EINVAL;
883 	pr = arg1;
884 
885 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
886 	    &pr->pr_set_hostname_allowed);
887 
888 	return error;
889 }
890 
891 static int
892 sysctl_prison_socket_unixiproute_only(SYSCTL_HANDLER_ARGS)
893 {
894 	struct prison *pr;
895 	int error;
896 
897 	if (arg1 == NULL)
898 		return EINVAL;
899 	pr = arg1;
900 
901 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
902 	    &pr->pr_socket_unixiproute_only);
903 
904 	return error;
905 }
906 
907 static int
908 sysctl_prison_sysvipc_allowed(SYSCTL_HANDLER_ARGS)
909 {
910 	struct prison *pr;
911 	int error;
912 
913 	if (arg1 == NULL)
914 		return EINVAL;
915 	pr = arg1;
916 
917 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
918 	    &pr->pr_sysvipc_allowed);
919 
920 	return error;
921 }
922 
923 static int
924 sysctl_prison_chflags_allowed(SYSCTL_HANDLER_ARGS)
925 {
926 	struct prison *pr;
927 	int error;
928 
929 	if (arg1 == NULL)
930 		return EINVAL;
931 	pr = arg1;
932 
933 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
934 	    &pr->pr_chflags_allowed);
935 
936 	return error;
937 }
938 
939 static int
940 sysctl_prison_allow_raw_sockets(SYSCTL_HANDLER_ARGS)
941 {
942 	struct prison *pr;
943 	int error;
944 
945 	if (arg1 == NULL)
946 		return EINVAL;
947 	pr = arg1;
948 
949 	error = sysctl_prison_switch_int8(oidp, arg1, arg2, req,
950 	    &pr->pr_allow_raw_sockets);
951 
952 	return error;
953 }
954 
955 /*
956  * Create a per-jail sysctl tree to control the prison
957  */
958 int
959 prison_sysctl_create(struct prison *pr)
960 {
961 	char id_str[7];
962 
963 	ksnprintf(id_str, 6, "%d", pr->pr_id);
964 
965 	pr->pr_sysctl_ctx = (struct sysctl_ctx_list *) kmalloc(
966 		sizeof(struct sysctl_ctx_list), M_TEMP, M_WAITOK | M_ZERO);
967 
968 	sysctl_ctx_init(pr->pr_sysctl_ctx);
969 
970 	/* Main jail node */
971 	pr->pr_sysctl_tree = SYSCTL_ADD_NODE(pr->pr_sysctl_ctx,
972 	    SYSCTL_STATIC_CHILDREN(_jail),
973 	    OID_AUTO, id_str, CTLFLAG_RD, 0,
974 	    "Jail specific settings");
975 
976 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
977 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
978 	    "set_hostname_allowed", CTLTYPE_INT | CTLFLAG_RW,
979 	    pr, sizeof(pr->pr_set_hostname_allowed),
980 	    sysctl_prison_set_hostname_allowed, "I",
981 	    "Processes in jail can set their hostnames");
982 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
983 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
984 	    "socket_unixiproute_only", CTLTYPE_INT | CTLFLAG_RW,
985 	    pr, sizeof(pr->pr_socket_unixiproute_only),
986 	    sysctl_prison_socket_unixiproute_only, "I",
987 	    "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
988 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
989 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
990 	    "sysvipc_allowed", CTLTYPE_INT | CTLFLAG_RW,
991 	    pr, sizeof(pr->pr_sysvipc_allowed),
992 	    sysctl_prison_sysvipc_allowed, "I",
993 	    "Processes in jail can use System V IPC primitives");
994 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
995 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
996 	    "chflags_allowed", CTLTYPE_INT | CTLFLAG_RW,
997 	    pr, sizeof(pr->pr_chflags_allowed),
998 	    sysctl_prison_chflags_allowed, "I",
999 	    "Processes in jail can alter system file flags");
1000 	SYSCTL_ADD_PROC(pr->pr_sysctl_ctx,
1001 	    SYSCTL_CHILDREN(pr->pr_sysctl_tree), OID_AUTO,
1002 	    "allow_raw_sockets", CTLTYPE_INT | CTLFLAG_RW,
1003 	    pr, sizeof(pr->pr_allow_raw_sockets),
1004 	    sysctl_prison_allow_raw_sockets, "I",
1005 	    "Process in jail can create raw sockets");
1006 
1007 	return 0;
1008 }
1009 
1010 int
1011 prison_sysctl_done(struct prison *pr)
1012 {
1013 	if (pr->pr_sysctl_tree) {
1014 		sysctl_ctx_free(pr->pr_sysctl_ctx);
1015 		kfree(pr->pr_sysctl_ctx, M_TEMP);
1016 		pr->pr_sysctl_tree = NULL;
1017 	}
1018 
1019 	return 0;
1020 }
1021