1 /* 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 */ 10 /*- 11 * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net> 12 * All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 37 /* 38 * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $ 39 */ 40 41 #include "opt_inet6.h" 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 #include <sys/kernel.h> 46 #include <sys/systm.h> 47 #include <sys/errno.h> 48 #include <sys/sysmsg.h> 49 #include <sys/malloc.h> 50 #include <sys/nlookup.h> 51 #include <sys/namecache.h> 52 #include <sys/proc.h> 53 #include <sys/caps.h> 54 #include <sys/jail.h> 55 #include <sys/socket.h> 56 #include <sys/sysctl.h> 57 #include <sys/kern_syscall.h> 58 #include <net/if.h> 59 #include <netinet/in.h> 60 #include <netinet6/in6_var.h> 61 62 static struct prison *prison_find(int); 63 static void prison_ipcache_init(struct prison *); 64 65 __read_mostly static prison_cap_t prison_default_caps; 66 67 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 68 69 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0, 70 "All jails settings"); 71 72 SYSCTL_NODE(_jail, OID_AUTO, defaults, CTLFLAG_RW, 0, 73 "Default options for jails"); 74 75 /*#define PRISON_DEBUG*/ 76 #ifdef PRISON_DEBUG 77 __read_mostly static int prison_debug; 78 SYSCTL_INT(_jail, OID_AUTO, debug, CTLFLAG_RW, &prison_debug, 0, 79 "Debug prison refs"); 80 #endif 81 82 SYSCTL_BIT64(_jail_defaults, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 83 &prison_default_caps, 1, PRISON_CAP_SYS_SET_HOSTNAME, 84 "Processes in jail can set their hostnames"); 85 86 SYSCTL_BIT64(_jail_defaults, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 87 &prison_default_caps, 0, PRISON_CAP_NET_UNIXIPROUTE, 88 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only"); 89 90 SYSCTL_BIT64(_jail_defaults, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 91 &prison_default_caps, 0, PRISON_CAP_SYS_SYSVIPC, 92 "Processes in jail can use System V IPC primitives"); 93 94 SYSCTL_BIT64(_jail_defaults, OID_AUTO, chflags_allowed, CTLFLAG_RW, 95 &prison_default_caps, 0, PRISON_CAP_VFS_CHFLAGS, 96 "Processes in jail can alter system file flags"); 97 98 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 99 &prison_default_caps, 0, PRISON_CAP_NET_RAW_SOCKETS, 100 "Process in jail can create raw sockets"); 101 102 SYSCTL_BIT64(_jail_defaults, OID_AUTO, allow_listen_override, CTLFLAG_RW, 103 &prison_default_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE, 104 "Process in jail can override host wildcard listen"); 105 106 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_nullfs, CTLFLAG_RW, 107 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS, 108 "Process in jail can mount nullfs(5) filesystems"); 109 110 SYSCTL_BIT64(_jail_defaults, OID_AUTO, vfs_mount_tmpfs, CTLFLAG_RW, 111 &prison_default_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS, 112 "Process in jail can mount tmpfs(5) filesystems"); 113 114 static int lastprid = 0; 115 static int prisoncount = 0; 116 117 static struct lock jail_lock = 118 LOCK_INITIALIZER("jail", 0, LK_CANRECURSE); 119 120 LIST_HEAD(prisonlist, prison); 121 static struct prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison); 122 123 static int 124 kern_jail_attach(int jid) 125 { 126 struct proc *p = curthread->td_proc; 127 struct prison *pr; 128 struct ucred *cr; 129 int error; 130 131 pr = prison_find(jid); 132 if (pr == NULL) 133 return(EINVAL); 134 135 error = kern_chroot(&pr->pr_root); 136 if (error) 137 return(error); 138 139 prison_hold(pr); 140 lwkt_gettoken(&p->p_token); 141 cr = cratom_proc(p); 142 cr->cr_prison = pr; 143 p->p_flags |= P_JAILED; 144 caps_set_locked(p, SYSCAP_RESTRICTEDROOT, __SYSCAP_ALL); 145 lwkt_reltoken(&p->p_token); 146 147 return(0); 148 } 149 150 static int 151 assign_prison_id(struct prison *pr) 152 { 153 int tryprid; 154 struct prison *tpr; 155 156 tryprid = lastprid + 1; 157 if (tryprid == JAIL_MAX) 158 tryprid = 1; 159 160 lockmgr(&jail_lock, LK_EXCLUSIVE); 161 next: 162 LIST_FOREACH(tpr, &allprison, pr_list) { 163 if (tpr->pr_id != tryprid) 164 continue; 165 tryprid++; 166 if (tryprid == JAIL_MAX) { 167 lockmgr(&jail_lock, LK_RELEASE); 168 return (ERANGE); 169 } 170 goto next; 171 } 172 pr->pr_id = lastprid = tryprid; 173 lockmgr(&jail_lock, LK_RELEASE); 174 175 return (0); 176 } 177 178 static int 179 kern_jail(struct prison *pr, struct jail *j) 180 { 181 int error; 182 struct nlookupdata nd; 183 184 error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW); 185 if (error) { 186 nlookup_done(&nd); 187 return (error); 188 } 189 error = nlookup(&nd); 190 if (error) { 191 nlookup_done(&nd); 192 return (error); 193 } 194 cache_copy(&nd.nl_nch, &pr->pr_root); 195 196 varsymset_init(&pr->pr_varsymset, NULL); 197 prison_ipcache_init(pr); 198 199 error = assign_prison_id(pr); 200 if (error) { 201 varsymset_clean(&pr->pr_varsymset); 202 nlookup_done(&nd); 203 return (error); 204 } 205 206 lockmgr(&jail_lock, LK_EXCLUSIVE); 207 LIST_INSERT_HEAD(&allprison, pr, pr_list); 208 ++prisoncount; 209 lockmgr(&jail_lock, LK_RELEASE); 210 211 error = prison_sysctl_create(pr); 212 if (error) 213 goto out; 214 215 error = kern_jail_attach(pr->pr_id); 216 if (error) 217 goto out2; 218 219 nlookup_done(&nd); 220 return 0; 221 222 out2: 223 prison_sysctl_done(pr); 224 225 out: 226 lockmgr(&jail_lock, LK_EXCLUSIVE); 227 LIST_REMOVE(pr, pr_list); 228 --prisoncount; 229 lockmgr(&jail_lock, LK_RELEASE); 230 varsymset_clean(&pr->pr_varsymset); 231 nlookup_done(&nd); 232 return (error); 233 } 234 235 /* 236 * jail() 237 * 238 * jail_args(syscallarg(struct jail *) jail) 239 * 240 * MPALMOSTSAFE 241 */ 242 int 243 sys_jail(struct sysmsg *sysmsg, const struct jail_args *uap) 244 { 245 struct prison *pr; 246 struct jail_ip_storage *jip; 247 struct jail j; 248 int error; 249 uint32_t jversion; 250 251 sysmsg->sysmsg_result = -1; 252 253 error = caps_priv_check_self(SYSCAP_NOJAIL_CREATE); 254 if (error) 255 return (error); 256 257 error = copyin(uap->jail, &jversion, sizeof(jversion)); 258 if (error) 259 return (error); 260 261 pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 262 SLIST_INIT(&pr->pr_ips); 263 lockmgr(&jail_lock, LK_EXCLUSIVE); 264 265 switch (jversion) { 266 case 0: 267 /* Single IPv4 jails. */ 268 { 269 struct jail_v0 jv0; 270 struct sockaddr_in ip4addr; 271 272 error = copyin(uap->jail, &jv0, sizeof(jv0)); 273 if (error) 274 goto out; 275 276 j.path = jv0.path; 277 j.hostname = jv0.hostname; 278 279 jip = kmalloc(sizeof(*jip), M_PRISON, M_WAITOK | M_ZERO); 280 ip4addr.sin_family = AF_INET; 281 ip4addr.sin_addr.s_addr = htonl(jv0.ip_number); 282 memcpy(&jip->ip, &ip4addr, sizeof(ip4addr)); 283 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries); 284 break; 285 } 286 287 case 1: 288 /* 289 * DragonFly multi noIP/IPv4/IPv6 jails 290 * 291 * NOTE: This version is unsupported by FreeBSD 292 * (which uses version 2 instead). 293 */ 294 295 error = copyin(uap->jail, &j, sizeof(j)); 296 if (error) 297 goto out; 298 299 for (int i = 0; i < j.n_ips; i++) { 300 jip = kmalloc(sizeof(*jip), M_PRISON, 301 M_WAITOK | M_ZERO); 302 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries); 303 error = copyin(&j.ips[i], &jip->ip, 304 sizeof(struct sockaddr_storage)); 305 if (error) 306 goto out; 307 } 308 break; 309 default: 310 error = EINVAL; 311 goto out; 312 } 313 314 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 315 if (error) 316 goto out; 317 318 /* Use default capabilities as a template */ 319 pr->pr_caps = prison_default_caps; 320 321 error = kern_jail(pr, &j); 322 if (error) 323 goto out; 324 325 sysmsg->sysmsg_result = pr->pr_id; 326 lockmgr(&jail_lock, LK_RELEASE); 327 328 return (0); 329 330 out: 331 /* Delete all ips */ 332 while (!SLIST_EMPTY(&pr->pr_ips)) { 333 jip = SLIST_FIRST(&pr->pr_ips); 334 SLIST_REMOVE_HEAD(&pr->pr_ips, entries); 335 kfree(jip, M_PRISON); 336 } 337 lockmgr(&jail_lock, LK_RELEASE); 338 kfree(pr, M_PRISON); 339 340 return (error); 341 } 342 343 /* 344 * int jail_attach(int jid); 345 * 346 * MPALMOSTSAFE 347 */ 348 int 349 sys_jail_attach(struct sysmsg *sysmsg, const struct jail_attach_args *uap) 350 { 351 int error; 352 353 error = caps_priv_check_self(SYSCAP_NOJAIL_ATTACH); 354 if (error) 355 return(error); 356 lockmgr(&jail_lock, LK_EXCLUSIVE); 357 error = kern_jail_attach(uap->jid); 358 lockmgr(&jail_lock, LK_RELEASE); 359 return (error); 360 } 361 362 static void 363 prison_ipcache_init(struct prison *pr) 364 { 365 struct jail_ip_storage *jis; 366 struct sockaddr_in *ip4; 367 struct sockaddr_in6 *ip6; 368 369 lockmgr(&jail_lock, LK_EXCLUSIVE); 370 SLIST_FOREACH(jis, &pr->pr_ips, entries) { 371 switch (jis->ip.ss_family) { 372 case AF_INET: 373 ip4 = (struct sockaddr_in *)&jis->ip; 374 if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == 375 IN_LOOPBACKNET) { 376 /* loopback address */ 377 if (pr->local_ip4 == NULL) 378 pr->local_ip4 = ip4; 379 } else { 380 /* public address */ 381 if (pr->nonlocal_ip4 == NULL) 382 pr->nonlocal_ip4 = ip4; 383 } 384 break; 385 386 case AF_INET6: 387 ip6 = (struct sockaddr_in6 *)&jis->ip; 388 if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) { 389 /* loopback address */ 390 if (pr->local_ip6 == NULL) 391 pr->local_ip6 = ip6; 392 } else { 393 /* public address */ 394 if (pr->nonlocal_ip6 == NULL) 395 pr->nonlocal_ip6 = ip6; 396 } 397 break; 398 } 399 } 400 lockmgr(&jail_lock, LK_RELEASE); 401 } 402 403 /* 404 * Changes INADDR_LOOPBACK for a valid jail address. 405 * ip is in network byte order. 406 * Returns 1 if the ip is among jail valid ips. 407 * Returns 0 if is not among jail valid ips or 408 * if couldn't replace INADDR_LOOPBACK for a valid 409 * IP. 410 */ 411 int 412 prison_replace_wildcards(struct thread *td, struct sockaddr *ip) 413 { 414 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 415 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 416 struct prison *pr; 417 418 if (td->td_proc == NULL || td->td_ucred == NULL) 419 return (1); 420 if ((pr = td->td_ucred->cr_prison) == NULL) 421 return (1); 422 423 if ((ip->sa_family == AF_INET && 424 ip4->sin_addr.s_addr == htonl(INADDR_ANY)) || 425 (ip->sa_family == AF_INET6 && 426 IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr))) 427 return (1); 428 if ((ip->sa_family == AF_INET && 429 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) || 430 (ip->sa_family == AF_INET6 && 431 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) { 432 if (!prison_get_local(pr, ip->sa_family, ip) && 433 !prison_get_nonlocal(pr, ip->sa_family, ip)) 434 return(0); 435 else 436 return(1); 437 } 438 if (jailed_ip(pr, ip)) 439 return(1); 440 return(0); 441 } 442 443 /* 444 * Convert the localhost IP to the actual jail IP 445 */ 446 int 447 prison_remote_ip(struct thread *td, struct sockaddr *ip) 448 { 449 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 450 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 451 struct prison *pr; 452 453 if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL) 454 return(1); 455 if ((pr = td->td_ucred->cr_prison) == NULL) 456 return(1); 457 if ((ip->sa_family == AF_INET && 458 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) || 459 (ip->sa_family == AF_INET6 && 460 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) { 461 if (!prison_get_local(pr, ip->sa_family, ip) && 462 !prison_get_nonlocal(pr, ip->sa_family, ip)) 463 return(0); 464 else 465 return(1); 466 } 467 return(1); 468 } 469 470 /* 471 * Convert the jail IP back to localhost 472 * 473 * Used by getsockname() and getpeername() to convert the in-jail loopback 474 * address back to LOCALHOST. For example, 127.0.0.2 -> 127.0.0.1. The 475 * idea is that programs running inside the jail should be unaware that they 476 * are using a different loopback IP than the host. 477 */ 478 __read_mostly static struct in6_addr sin6_localhost = IN6ADDR_LOOPBACK_INIT; 479 480 int 481 prison_local_ip(struct thread *td, struct sockaddr *ip) 482 { 483 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 484 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 485 struct prison *pr; 486 487 if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL) 488 return(1); 489 if ((pr = td->td_ucred->cr_prison) == NULL) 490 return(1); 491 if (ip->sa_family == AF_INET && pr->local_ip4 && 492 pr->local_ip4->sin_addr.s_addr == ip4->sin_addr.s_addr && 493 pr->local_ip4->sin_addr.s_addr != htonl(INADDR_LOOPBACK)) { 494 ip4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 495 return(0); 496 } 497 if (ip->sa_family == AF_INET6 && pr->local_ip6 && 498 bcmp(&pr->local_ip6->sin6_addr, &ip6->sin6_addr, 499 sizeof(ip6->sin6_addr)) == 0) { 500 bcopy(&sin6_localhost, &ip6->sin6_addr, sizeof(ip6->sin6_addr)); 501 return(0); 502 } 503 return(1); 504 } 505 506 /* 507 * Prison get non loopback ip: 508 * - af is the address family of the ip we want (AF_INET|AF_INET6). 509 * - If ip != NULL, put the first IP address that is not a loopback address 510 * into *ip. 511 * 512 * ip is in network by order and we don't touch it unless we find a valid ip. 513 * No matter if ip == NULL or not, we return either a valid struct sockaddr *, 514 * or NULL. This struct may not be modified. 515 */ 516 struct sockaddr * 517 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip) 518 { 519 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 520 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 521 522 /* Check if it is cached */ 523 switch(af) { 524 case AF_INET: 525 if (ip4 != NULL && pr->nonlocal_ip4 != NULL) 526 ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr; 527 return (struct sockaddr *)pr->nonlocal_ip4; 528 529 case AF_INET6: 530 if (ip6 != NULL && pr->nonlocal_ip6 != NULL) 531 ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr; 532 return (struct sockaddr *)pr->nonlocal_ip6; 533 } 534 535 /* NOTREACHED */ 536 return NULL; 537 } 538 539 /* 540 * Prison get loopback ip. 541 * - af is the address family of the ip we want (AF_INET|AF_INET6). 542 * - If ip != NULL, put the first IP address that is not a loopback address 543 * into *ip. 544 * 545 * ip is in network by order and we don't touch it unless we find a valid ip. 546 * No matter if ip == NULL or not, we return either a valid struct sockaddr *, 547 * or NULL. This struct may not be modified. 548 */ 549 struct sockaddr * 550 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip) 551 { 552 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip; 553 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip; 554 555 /* Check if it is cached */ 556 switch(af) { 557 case AF_INET: 558 if (ip4 != NULL && pr->local_ip4 != NULL) 559 ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr; 560 return (struct sockaddr *)pr->local_ip4; 561 562 case AF_INET6: 563 if (ip6 != NULL && pr->local_ip6 != NULL) 564 ip6->sin6_addr = pr->local_ip6->sin6_addr; 565 return (struct sockaddr *)pr->local_ip6; 566 } 567 568 /* NOTREACHED */ 569 return NULL; 570 } 571 572 /* Check if the IP is among ours, if it is return 1, else 0 */ 573 int 574 jailed_ip(struct prison *pr, const struct sockaddr *ip) 575 { 576 const struct jail_ip_storage *jis; 577 const struct sockaddr_in *jip4, *ip4; 578 const struct sockaddr_in6 *jip6, *ip6; 579 580 if (pr == NULL) 581 return(0); 582 ip4 = (const struct sockaddr_in *)ip; 583 ip6 = (const struct sockaddr_in6 *)ip; 584 585 lockmgr(&jail_lock, LK_EXCLUSIVE); 586 SLIST_FOREACH(jis, &pr->pr_ips, entries) { 587 switch (ip->sa_family) { 588 case AF_INET: 589 jip4 = (const struct sockaddr_in *) &jis->ip; 590 if (jip4->sin_family == AF_INET && 591 ip4->sin_addr.s_addr == jip4->sin_addr.s_addr) { 592 lockmgr(&jail_lock, LK_RELEASE); 593 return(1); 594 } 595 break; 596 case AF_INET6: 597 jip6 = (const struct sockaddr_in6 *) &jis->ip; 598 if (jip6->sin6_family == AF_INET6 && 599 IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr, 600 &jip6->sin6_addr)) { 601 lockmgr(&jail_lock, LK_RELEASE); 602 return(1); 603 } 604 break; 605 } 606 } 607 lockmgr(&jail_lock, LK_RELEASE); 608 /* Ip not in list */ 609 return(0); 610 } 611 612 int 613 prison_if(struct ucred *cred, struct sockaddr *sa) 614 { 615 struct prison *pr; 616 struct sockaddr_in *sai = (struct sockaddr_in*) sa; 617 618 pr = cred->cr_prison; 619 620 if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6)) 621 && PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_UNIXIPROUTE)) 622 return(1); 623 else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6)) 624 return(0); 625 else if (jailed_ip(pr, sa)) 626 return(0); 627 return(1); 628 } 629 630 /* 631 * Returns a prison instance, or NULL on failure. 632 */ 633 static struct prison * 634 prison_find(int prid) 635 { 636 struct prison *pr; 637 638 lockmgr(&jail_lock, LK_EXCLUSIVE); 639 LIST_FOREACH(pr, &allprison, pr_list) { 640 if (pr->pr_id == prid) 641 break; 642 } 643 lockmgr(&jail_lock, LK_RELEASE); 644 645 return(pr); 646 } 647 648 static int 649 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 650 { 651 struct thread *td = curthread; 652 struct jail_ip_storage *jip; 653 #ifdef INET6 654 struct sockaddr_in6 *jsin6; 655 #endif 656 struct sockaddr_in *jsin; 657 struct lwp *lp; 658 struct prison *pr; 659 unsigned int jlssize, jlsused; 660 int count, error; 661 char *jls; /* Jail list */ 662 char *oip; /* Output ip */ 663 char *fullpath, *freepath; 664 665 jlsused = 0; 666 667 if (jailed(td->td_ucred)) 668 return (0); 669 lp = td->td_lwp; 670 retry: 671 count = prisoncount; 672 673 if (count == 0) 674 return(0); 675 676 jlssize = (count * 1024); 677 jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO); 678 if (count < prisoncount) { 679 kfree(jls, M_TEMP); 680 goto retry; 681 } 682 count = prisoncount; 683 684 lockmgr(&jail_lock, LK_EXCLUSIVE); 685 LIST_FOREACH(pr, &allprison, pr_list) { 686 error = cache_fullpath(lp->lwp_proc, &pr->pr_root, NULL, 687 &fullpath, &freepath, 0); 688 if (error) 689 continue; 690 if (jlsused && jlsused < jlssize) 691 jls[jlsused++] = '\n'; 692 count = ksnprintf(jls + jlsused, (jlssize - jlsused), 693 "%d %s %s", 694 pr->pr_id, pr->pr_host, fullpath); 695 kfree(freepath, M_TEMP); 696 if (count < 0) 697 goto end; 698 jlsused += count; 699 700 /* Copy the IPS */ 701 SLIST_FOREACH(jip, &pr->pr_ips, entries) { 702 char buf[INET_ADDRSTRLEN]; 703 704 jsin = (struct sockaddr_in *)&jip->ip; 705 706 switch(jsin->sin_family) { 707 case AF_INET: 708 oip = kinet_ntoa(jsin->sin_addr, buf); 709 break; 710 #ifdef INET6 711 case AF_INET6: 712 jsin6 = (struct sockaddr_in6 *)&jip->ip; 713 oip = ip6_sprintf(&jsin6->sin6_addr); 714 break; 715 #endif 716 default: 717 oip = "?family?"; 718 break; 719 } 720 721 if ((jlssize - jlsused) < (strlen(oip) + 1)) { 722 error = ERANGE; 723 goto end; 724 } 725 count = ksnprintf(jls + jlsused, (jlssize - jlsused), 726 " %s", oip); 727 if (count < 0) 728 goto end; 729 jlsused += count; 730 } 731 } 732 733 /* 734 * The format is: 735 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id... 736 */ 737 error = SYSCTL_OUT(req, jls, jlsused); 738 end: 739 lockmgr(&jail_lock, LK_RELEASE); 740 kfree(jls, M_TEMP); 741 742 return(error); 743 } 744 745 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, 746 sysctl_jail_list, "A", "List of active jails"); 747 748 static int 749 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 750 { 751 int error, injail; 752 753 injail = jailed(req->td->td_ucred); 754 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 755 756 return (error); 757 } 758 759 SYSCTL_PROC(_jail, OID_AUTO, jailed, 760 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NOLOCK, NULL, 0, 761 sysctl_jail_jailed, "I", "Process in jail?"); 762 763 /* 764 * MPSAFE 765 */ 766 void 767 prison_hold(struct prison *pr) 768 { 769 atomic_add_int(&pr->pr_ref, 1); 770 #ifdef PRISON_DEBUG 771 if (prison_debug > 0) { 772 --prison_debug; 773 print_backtrace(-1); 774 } 775 #endif 776 } 777 778 /* 779 * MPALMOSTSAFE 780 */ 781 void 782 prison_free(struct prison *pr) 783 { 784 struct jail_ip_storage *jls; 785 786 #ifdef PRISON_DEBUG 787 if (prison_debug > 0) { 788 --prison_debug; 789 print_backtrace(-1); 790 } 791 #endif 792 KKASSERT(pr->pr_ref > 0); 793 if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1) 794 return; 795 796 /* 797 * The global jail lock is needed on the last ref to adjust 798 * the list. 799 */ 800 lockmgr(&jail_lock, LK_EXCLUSIVE); 801 if (pr->pr_ref) { 802 lockmgr(&jail_lock, LK_RELEASE); 803 return; 804 } 805 LIST_REMOVE(pr, pr_list); 806 --prisoncount; 807 808 /* 809 * Clean up 810 */ 811 while (!SLIST_EMPTY(&pr->pr_ips)) { 812 jls = SLIST_FIRST(&pr->pr_ips); 813 SLIST_REMOVE_HEAD(&pr->pr_ips, entries); 814 kfree(jls, M_PRISON); 815 } 816 lockmgr(&jail_lock, LK_RELEASE); 817 818 if (pr->pr_linux != NULL) 819 kfree(pr->pr_linux, M_PRISON); 820 varsymset_clean(&pr->pr_varsymset); 821 822 /* Release the sysctl tree */ 823 prison_sysctl_done(pr); 824 825 cache_drop(&pr->pr_root); 826 kfree(pr, M_PRISON); 827 } 828 829 /* 830 * Check if permisson for a specific privilege is granted within jail. 831 * 832 * MPSAFE 833 */ 834 int 835 prison_priv_check(struct ucred *cred, int cap) 836 { 837 struct prison *pr = cred->cr_prison; 838 839 if (!jailed(cred)) 840 return (0); 841 842 switch (cap & ~__SYSCAP_XFLAGS) { 843 case SYSCAP_NOCRED_SETUID: 844 case SYSCAP_NOCRED_SETGID: 845 case SYSCAP_NOCRED_SETEUID: 846 case SYSCAP_NOCRED_SETEGID: 847 case SYSCAP_NOCRED_SETREUID: 848 case SYSCAP_NOCRED_SETREGID: 849 case SYSCAP_NOCRED_SETRESUID: 850 case SYSCAP_NOCRED_SETRESGID: 851 case SYSCAP_NOCRED_SETGROUPS: 852 853 case SYSCAP_NOVFS_SYSFLAGS: 854 case SYSCAP_NOVFS_CHOWN: 855 case SYSCAP_NOVFS_CHMOD: 856 case SYSCAP_NOVFS_CHROOT: 857 case SYSCAP_NOVFS_LINK: 858 case SYSCAP_NOVFS_CHFLAGS_DEV: 859 case SYSCAP_NOVFS_REVOKE: 860 case SYSCAP_NOVFS_MKNOD_BAD: 861 case SYSCAP_NOVFS_MKNOD_WHT: 862 case SYSCAP_NOVFS_MKNOD_DIR: 863 return (0); 864 865 case SYSCAP_NOMOUNT_NULLFS: 866 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_NULLFS)) 867 return (0); 868 else 869 return (EPERM); 870 case SYSCAP_NOMOUNT_DEVFS: 871 return (EPERM); 872 case SYSCAP_NOMOUNT_TMPFS: 873 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_VFS_MOUNT_TMPFS)) 874 return (0); 875 else 876 return (EPERM); 877 878 case SYSCAP_NOVFS_SETATTR: 879 case SYSCAP_NOVFS_SETGID: 880 881 case SYSCAP_NOPROC_SETRLIMIT: 882 case SYSCAP_NOPROC_SETLOGIN: 883 884 case SYSCAP_NOSYSCTL_WR: 885 886 case SYSCAP_NOVARSYM_SYS: 887 888 case SYSCAP_NOSETHOSTNAME: 889 890 case SYSCAP_NOPROC_TRESPASS: 891 return (0); 892 893 case SYSCAP_NOQUOTA_WR: 894 return (0); 895 896 case SYSCAP_NODEBUG_UNPRIV: 897 return (0); 898 899 /* 900 * Allow jailed root to bind reserved ports. 901 */ 902 case SYSCAP_NONET_RESPORT: 903 return (0); 904 905 906 /* 907 * Conditionally allow creating raw sockets in jail. 908 */ 909 case SYSCAP_NONET_RAW: 910 if (PRISON_CAP_ISSET(pr->pr_caps, 911 PRISON_CAP_NET_RAW_SOCKETS)) 912 return (0); 913 else 914 return (EPERM); 915 916 case SYSCAP_NOVFS_IOCTL: 917 return (0); 918 919 default: 920 921 return (EPERM); 922 } 923 } 924 925 926 /* 927 * Create a per-jail sysctl tree to control the prison 928 */ 929 int 930 prison_sysctl_create(struct prison *pr) 931 { 932 char id_str[7]; 933 934 ksnprintf(id_str, 6, "%d", pr->pr_id); 935 936 pr->pr_sysctl_ctx = (struct sysctl_ctx_list *) kmalloc( 937 sizeof(struct sysctl_ctx_list), M_PRISON, M_WAITOK | M_ZERO); 938 939 sysctl_ctx_init(pr->pr_sysctl_ctx); 940 941 /* Main jail node */ 942 pr->pr_sysctl_tree = SYSCTL_ADD_NODE(pr->pr_sysctl_ctx, 943 SYSCTL_STATIC_CHILDREN(_jail), 944 OID_AUTO, id_str, CTLFLAG_RD, 0, 945 "Jail specific settings"); 946 947 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 948 OID_AUTO, "sys_set_hostname", CTLFLAG_RW, 949 &pr->pr_caps, 0, PRISON_CAP_SYS_SET_HOSTNAME, 950 "Processes in jail can set their hostnames"); 951 952 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 953 OID_AUTO, "sys_sysvipc", CTLFLAG_RW, 954 &pr->pr_caps, 0, PRISON_CAP_SYS_SYSVIPC, 955 "Processes in jail can use System V IPC primitives"); 956 957 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 958 OID_AUTO, "net_unixiproute", CTLFLAG_RW, 959 &pr->pr_caps, 0, PRISON_CAP_NET_UNIXIPROUTE, 960 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only"); 961 962 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 963 OID_AUTO, "net_raw_sockets", CTLFLAG_RW, 964 &pr->pr_caps, 0, PRISON_CAP_NET_RAW_SOCKETS, 965 "Process in jail can create raw sockets"); 966 967 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 968 OID_AUTO, "allow_listen_override", CTLFLAG_RW, 969 &pr->pr_caps, 0, PRISON_CAP_NET_LISTEN_OVERRIDE, 970 "Process in jail can create raw sockets"); 971 972 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 973 OID_AUTO, "vfs_chflags", CTLFLAG_RW, 974 &pr->pr_caps, 0, PRISON_CAP_VFS_CHFLAGS, 975 "Process in jail can override host wildcard listen"); 976 977 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 978 OID_AUTO, "vfs_mount_nullfs", CTLFLAG_RW, 979 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_NULLFS, 980 "Processes in jail can mount nullfs(5) filesystems"); 981 982 SYSCTL_ADD_BIT64(pr->pr_sysctl_ctx, SYSCTL_CHILDREN(pr->pr_sysctl_tree), 983 OID_AUTO, "vfs_mount_tmpfs", CTLFLAG_RW, 984 &pr->pr_caps, 0, PRISON_CAP_VFS_MOUNT_TMPFS, 985 "Processes in jail can mount tmpfs(5) filesystems"); 986 987 return 0; 988 } 989 990 int 991 prison_sysctl_done(struct prison *pr) 992 { 993 if (pr->pr_sysctl_tree) { 994 sysctl_ctx_free(pr->pr_sysctl_ctx); 995 kfree(pr->pr_sysctl_ctx, M_PRISON); 996 pr->pr_sysctl_tree = NULL; 997 } 998 999 return 0; 1000 } 1001