10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*2677Sml93401 * Common Development and Distribution License (the "License"). 6*2677Sml93401 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*2677Sml93401 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 270Sstevel@tonic-gate /* All Rights Reserved */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate 300Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 310Sstevel@tonic-gate 320Sstevel@tonic-gate /* 330Sstevel@tonic-gate * Common Inter-Process Communication routines. 340Sstevel@tonic-gate * 350Sstevel@tonic-gate * Overview 360Sstevel@tonic-gate * -------- 370Sstevel@tonic-gate * 380Sstevel@tonic-gate * The System V inter-process communication (IPC) facilities provide 390Sstevel@tonic-gate * three services, message queues, semaphore arrays, and shared memory 400Sstevel@tonic-gate * segments, which are mananged using filesystem-like namespaces. 410Sstevel@tonic-gate * Unlike a filesystem, these namespaces aren't mounted and accessible 420Sstevel@tonic-gate * via a path -- a special API is used to interact with the different 430Sstevel@tonic-gate * facilities (nothing precludes a VFS-based interface, but the 440Sstevel@tonic-gate * standards require the special APIs). Furthermore, these special 450Sstevel@tonic-gate * APIs don't use file descriptors, nor do they have an equivalent. 460Sstevel@tonic-gate * This means that every operation which acts on an object needs to 470Sstevel@tonic-gate * perform the quivalent of a lookup, which in turn means that every 480Sstevel@tonic-gate * operation can fail if the specified object doesn't exist in the 490Sstevel@tonic-gate * facility's namespace. 500Sstevel@tonic-gate * 510Sstevel@tonic-gate * Objects 520Sstevel@tonic-gate * ------- 530Sstevel@tonic-gate * 540Sstevel@tonic-gate * Each object in a namespace has a unique ID, which is assigned by the 550Sstevel@tonic-gate * system and is used to identify the object when performing operations 560Sstevel@tonic-gate * on it. An object can also have a key, which is selected by the user 570Sstevel@tonic-gate * at allocation time and is used as a primitive rendezvous mechanism. 580Sstevel@tonic-gate * An object without a key is said to have a "private" key. 590Sstevel@tonic-gate * 600Sstevel@tonic-gate * To perform an operation on an object given its key, one must first 610Sstevel@tonic-gate * perform a lookup and obtain its ID. The ID is then used to identify 620Sstevel@tonic-gate * the object when performing the operation. If the object has a 630Sstevel@tonic-gate * private key, the ID must be known or obtained by other means. 640Sstevel@tonic-gate * 650Sstevel@tonic-gate * Each object in the namespace has a creator uid and gid, as well as 660Sstevel@tonic-gate * an owner uid and gid. Both are initialized with the ruid and rgid 670Sstevel@tonic-gate * of the process which created the object. The creator or current 680Sstevel@tonic-gate * owner has the ability to change the owner of the object. 690Sstevel@tonic-gate * 700Sstevel@tonic-gate * Each object in the namespace has a set of file-like permissions, 710Sstevel@tonic-gate * which, in conjunction with the creator and owner uid and gid, 720Sstevel@tonic-gate * control read and write access to the object (execute is ignored). 730Sstevel@tonic-gate * 74*2677Sml93401 * Each object also has a creator project and zone, which are used to 75*2677Sml93401 * account for its resource usage. 760Sstevel@tonic-gate * 770Sstevel@tonic-gate * Operations 780Sstevel@tonic-gate * ---------- 790Sstevel@tonic-gate * 800Sstevel@tonic-gate * There are five operations which all three facilities have in 810Sstevel@tonic-gate * common: GET, SET, STAT, RMID, and IDS. 820Sstevel@tonic-gate * 830Sstevel@tonic-gate * GET, like open, is used to allocate a new object or obtain an 840Sstevel@tonic-gate * existing one (using its key). It takes a key, a set of flags and 850Sstevel@tonic-gate * mode bits, and optionally facility-specific arguments. If the key 860Sstevel@tonic-gate * is IPC_PRIVATE, a new object with the requested mode bits and 870Sstevel@tonic-gate * facility-specific attributes is created. If the key isn't 880Sstevel@tonic-gate * IPC_PRIVATE, the GET will attempt to look up the specified key and 890Sstevel@tonic-gate * either return that or create a new key depending on the state of the 900Sstevel@tonic-gate * IPC_CREAT and IPC_EXCL flags, much like open. If GET needs to 910Sstevel@tonic-gate * allocate an object, it can fail if there is insufficient space in 920Sstevel@tonic-gate * the namespace (the maximum number of ids for the facility has been 930Sstevel@tonic-gate * exceeded) or if the facility-specific initialization fails. If GET 940Sstevel@tonic-gate * finds an object it can return, it can still fail if that object's 950Sstevel@tonic-gate * permissions or facility-specific attributes are less than those 960Sstevel@tonic-gate * requested. 970Sstevel@tonic-gate * 980Sstevel@tonic-gate * SET is used to adjust facility-specific parameters of an object, in 990Sstevel@tonic-gate * addition to the owner uid and gid, and mode bits. It can fail if 1000Sstevel@tonic-gate * the caller isn't the creator or owner. 1010Sstevel@tonic-gate * 1020Sstevel@tonic-gate * STAT is used to obtain information about an object including the 1030Sstevel@tonic-gate * general attributes object described as well as facility-specific 1040Sstevel@tonic-gate * information. It can fail if the caller doesn't have read 1050Sstevel@tonic-gate * permission. 1060Sstevel@tonic-gate * 1070Sstevel@tonic-gate * RMID removes an object from the namespace. Subsequent operations 1080Sstevel@tonic-gate * using the object's ID or key will fail (until another object is 1090Sstevel@tonic-gate * created with the same key or ID). Since an RMID may be performed 1100Sstevel@tonic-gate * asynchronously with other operations, it is possible that other 1110Sstevel@tonic-gate * threads and/or processes will have references to the object. While 1120Sstevel@tonic-gate * a facility may have actions which need to be performed at RMID time, 1130Sstevel@tonic-gate * only when all references are dropped can the object be destroyed. 1140Sstevel@tonic-gate * RMID will fail if the caller isn't the creator or owner. 1150Sstevel@tonic-gate * 1160Sstevel@tonic-gate * IDS obtains a list of all IDs in a facility's namespace. There are 1170Sstevel@tonic-gate * no facility-specific behaviors of IDS. 1180Sstevel@tonic-gate * 1190Sstevel@tonic-gate * Design 1200Sstevel@tonic-gate * ------ 1210Sstevel@tonic-gate * 1220Sstevel@tonic-gate * Because some IPC facilities provide services whose operations must 1230Sstevel@tonic-gate * scale, a mechanism which allows fast, concurrent access to 1240Sstevel@tonic-gate * individual objects is needed. Of primary importance is object 1250Sstevel@tonic-gate * lookup based on ID (SET, STAT, others). Allocation (GET), 1260Sstevel@tonic-gate * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are 1270Sstevel@tonic-gate * lesser concerns, but should be implemented in such a way that ID 1280Sstevel@tonic-gate * lookup isn't affected (at least not in the common case). 1290Sstevel@tonic-gate * 1300Sstevel@tonic-gate * Starting from the bottom up, each object is represented by a 1310Sstevel@tonic-gate * structure, the first member of which must be a kipc_perm_t. The 1320Sstevel@tonic-gate * kipc_perm_t contains the information described above in "Objects", a 1330Sstevel@tonic-gate * reference count (since the object may continue to exist after it has 1340Sstevel@tonic-gate * been removed from the namespace), as well as some additional 1350Sstevel@tonic-gate * metadata used to manage data structure membership. These objects 1360Sstevel@tonic-gate * are dynamically allocated. 1370Sstevel@tonic-gate * 1380Sstevel@tonic-gate * Above the objects is a power-of-two sized table of ID slots. Each 1390Sstevel@tonic-gate * slot contains a pointer to an object, a sequence number, and a 1400Sstevel@tonic-gate * lock. An object's ID is a function of its slot's index in the table 1410Sstevel@tonic-gate * and its slot's sequence number. Every time a slot is released (via 1420Sstevel@tonic-gate * RMID) its sequence number is increased. Strictly speaking, the 1430Sstevel@tonic-gate * sequence number is unnecessary. However, checking the sequence 1440Sstevel@tonic-gate * number after a lookup provides a certain degree of robustness 1450Sstevel@tonic-gate * against the use of stale IDs (useful since nothing else does). When 1460Sstevel@tonic-gate * the table fills up, it is resized (see Locking, below). 1470Sstevel@tonic-gate * 1480Sstevel@tonic-gate * Of an ID's 31 bits (an ID is, as defined by the standards, a signed 1490Sstevel@tonic-gate * int) the top IPC_SEQ_BITS are used for the sequence number with the 1500Sstevel@tonic-gate * remainder holding the index into the table. The size of the table 1510Sstevel@tonic-gate * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots. 1520Sstevel@tonic-gate * 1530Sstevel@tonic-gate * Managing this table is the ipc_service structure. It contains a 1540Sstevel@tonic-gate * pointer to the dynamically allocated ID table, a namespace-global 1550Sstevel@tonic-gate * lock, an id_space for managing the free space in the table, and 1560Sstevel@tonic-gate * sundry other metadata necessary for the maintenance of the 1570Sstevel@tonic-gate * namespace. An AVL tree of all keyed objects in the table (sorted by 1580Sstevel@tonic-gate * key) is used for key lookups. An unordered doubly linked list of 1590Sstevel@tonic-gate * all objects in the namespace (keyed or not) is maintained to 1600Sstevel@tonic-gate * facilitate ID enumeration. 1610Sstevel@tonic-gate * 1620Sstevel@tonic-gate * To help visualize these relationships, here's a picture of a 1630Sstevel@tonic-gate * namespace with a table of size 8 containing three objects 1640Sstevel@tonic-gate * (IPC_SEQ_BITS = 28): 1650Sstevel@tonic-gate * 1660Sstevel@tonic-gate * 1670Sstevel@tonic-gate * +-ipc_service_t--+ 1680Sstevel@tonic-gate * | table *---\ 1690Sstevel@tonic-gate * | keys *---+----------------------\ 1700Sstevel@tonic-gate * | all ids *--\| | 1710Sstevel@tonic-gate * | | || | 1720Sstevel@tonic-gate * +----------------+ || | 1730Sstevel@tonic-gate * || | 1740Sstevel@tonic-gate * /-------------------/| | 1750Sstevel@tonic-gate * | /---------------/ | 1760Sstevel@tonic-gate * | | | 1770Sstevel@tonic-gate * | v | 1780Sstevel@tonic-gate * | +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+ 1790Sstevel@tonic-gate * | | Seq=3 | | | Seq=1 | : | | | Seq=6 | 1800Sstevel@tonic-gate * | | | | | | : | | | | 1810Sstevel@tonic-gate * | +-*------+--------+--------+-*------+----+---+--------+--------+-*------+ 1820Sstevel@tonic-gate * | | | | | 1830Sstevel@tonic-gate * | | /---/ | /----------------/ 1840Sstevel@tonic-gate * | | | | | 1850Sstevel@tonic-gate * | v v | v 1860Sstevel@tonic-gate * | +-kipc_perm_t-+ +-kipc_perm_t-+ | +-kipc_perm_t-+ 1870Sstevel@tonic-gate * | | id=0x30 | | id=0x13 | | | id=0x67 | 1880Sstevel@tonic-gate * | | key=0xfeed | | key=0xbeef | | | key=0xcafe | 1890Sstevel@tonic-gate * \->| [list] |<------>| [list] |<------>| [list] | 1900Sstevel@tonic-gate * /->| [avl left] x /--->| [avl left] x \--->| [avl left] *---\ 1910Sstevel@tonic-gate * | | [avl right] x | | [avl right] x | [avl right] *---+-\ 1920Sstevel@tonic-gate * | | | | | | | | | | 1930Sstevel@tonic-gate * | +-------------+ | +-------------+ +-------------+ | | 1940Sstevel@tonic-gate * | \---------------------------------------------/ | 1950Sstevel@tonic-gate * \--------------------------------------------------------------------/ 1960Sstevel@tonic-gate * 1970Sstevel@tonic-gate * Locking 1980Sstevel@tonic-gate * ------- 1990Sstevel@tonic-gate * 2000Sstevel@tonic-gate * There are three locks (or sets of locks) which are used to ensure 2010Sstevel@tonic-gate * correctness: the slot locks, the namespace lock, and p_lock (needed 2020Sstevel@tonic-gate * when checking resource controls). Their ordering is 2030Sstevel@tonic-gate * 2040Sstevel@tonic-gate * namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock 2050Sstevel@tonic-gate * 2060Sstevel@tonic-gate * Generally speaking, the namespace lock is used to protect allocation 2070Sstevel@tonic-gate * and removal from the namespace, ID enumeration, and resizing the ID 2080Sstevel@tonic-gate * table. Specifically: 2090Sstevel@tonic-gate * 2100Sstevel@tonic-gate * - write access to all fields of the ipc_service structure 2110Sstevel@tonic-gate * - read access to all variable fields of ipc_service except 2120Sstevel@tonic-gate * ipcs_tabsz (table size) and ipcs_table (the table pointer) 2130Sstevel@tonic-gate * - read/write access to ipc_avl, ipc_list in visible objects' 2140Sstevel@tonic-gate * kipc_perm structures (i.e. objects which have been removed from 2150Sstevel@tonic-gate * the namespace don't have this restriction) 2160Sstevel@tonic-gate * - write access to ipct_seq and ipct_data in the table entries 2170Sstevel@tonic-gate * 2180Sstevel@tonic-gate * A slot lock by itself is meaningless (except when resizing). Of 2190Sstevel@tonic-gate * greater interest conceptually is the notion of an ID lock -- a 2200Sstevel@tonic-gate * "virtual lock" which refers to whichever slot lock an object's ID 2210Sstevel@tonic-gate * currently hashes to. 2220Sstevel@tonic-gate * 2230Sstevel@tonic-gate * An ID lock protects all objects with that ID. Normally there will 2240Sstevel@tonic-gate * only be one such object: the one pointed to by the locked slot. 2250Sstevel@tonic-gate * However, if an object is removed from the namespace but retains 2260Sstevel@tonic-gate * references (e.g. an attached shared memory segment which has been 2270Sstevel@tonic-gate * RMIDed), it continues to use the lock associated with its original 2280Sstevel@tonic-gate * ID. While this can result in increased contention, operations which 2290Sstevel@tonic-gate * require taking the ID lock of removed objects are infrequent. 2300Sstevel@tonic-gate * 2310Sstevel@tonic-gate * Specifically, an ID lock protects the contents of an object's 2320Sstevel@tonic-gate * structure, including the contents of the embedded kipc_perm 2330Sstevel@tonic-gate * structure (but excluding those fields protected by the namespace 2340Sstevel@tonic-gate * lock). It also protects the ipct_seq and ipct_data fields in its 2350Sstevel@tonic-gate * slot (it is really a slot lock, after all). 2360Sstevel@tonic-gate * 2370Sstevel@tonic-gate * Recall that the table is resizable. To avoid requiring every ID 2380Sstevel@tonic-gate * lookup to take a global lock, a scheme much like that employed for 2390Sstevel@tonic-gate * file descriptors (see the comment above UF_ENTER in user.h) is 2400Sstevel@tonic-gate * used. Note that the sequence number and data pointer are protected 2410Sstevel@tonic-gate * by both the namespace lock and their slot lock. When the table is 2420Sstevel@tonic-gate * resized, the following operations take place: 2430Sstevel@tonic-gate * 2440Sstevel@tonic-gate * 1) A new table is allocated. 2450Sstevel@tonic-gate * 2) The global lock is taken. 2460Sstevel@tonic-gate * 3) All old slots are locked, in order. 2470Sstevel@tonic-gate * 4) The first half of the new slots are locked. 2480Sstevel@tonic-gate * 5) All table entries are copied to the new table, and cleared from 2490Sstevel@tonic-gate * the old table. 2500Sstevel@tonic-gate * 6) The ipc_service structure is updated to point to the new table. 2510Sstevel@tonic-gate * 7) The ipc_service structure is updated with the new table size. 2520Sstevel@tonic-gate * 8) All slot locks (old and new) are dropped. 2530Sstevel@tonic-gate * 2540Sstevel@tonic-gate * Because the slot locks are embedded in the table, ID lookups and 2550Sstevel@tonic-gate * other operations which require taking an slot lock need to verify 2560Sstevel@tonic-gate * that the lock taken wasn't part of a stale table. This is 2570Sstevel@tonic-gate * accomplished by checking the table size before and after 2580Sstevel@tonic-gate * dereferencing the table pointer and taking the lock: if the size 2590Sstevel@tonic-gate * changes, the lock must be dropped and reacquired. It is this 2600Sstevel@tonic-gate * additional work which distinguishes an ID lock from a slot lock. 2610Sstevel@tonic-gate * 2620Sstevel@tonic-gate * Because we can't guarantee that threads aren't accessing the old 2630Sstevel@tonic-gate * tables' locks, they are never deallocated. To prevent spurious 2640Sstevel@tonic-gate * reports of memory leaks, a pointer to the discarded table is stored 2650Sstevel@tonic-gate * in the new one in step 5. (Theoretically ipcs_destroy will delete 2660Sstevel@tonic-gate * the discarded tables, but it is only ever called from a failed _init 2670Sstevel@tonic-gate * invocation; i.e. when there aren't any.) 2680Sstevel@tonic-gate * 2690Sstevel@tonic-gate * Interfaces 2700Sstevel@tonic-gate * ---------- 2710Sstevel@tonic-gate * 2720Sstevel@tonic-gate * The following interfaces are provided by the ipc module for use by 2730Sstevel@tonic-gate * the individual IPC facilities: 2740Sstevel@tonic-gate * 2750Sstevel@tonic-gate * ipcperm_access 2760Sstevel@tonic-gate * 2770Sstevel@tonic-gate * Given an object and a cred structure, determines if the requested 2780Sstevel@tonic-gate * access type is allowed. 2790Sstevel@tonic-gate * 2800Sstevel@tonic-gate * ipcperm_set, ipcperm_stat, 2810Sstevel@tonic-gate * ipcperm_set64, ipcperm_stat64 2820Sstevel@tonic-gate * 2830Sstevel@tonic-gate * Performs the common portion of an STAT or SET operation. All 2840Sstevel@tonic-gate * (except stat and stat64) can fail, so they should be called before 2850Sstevel@tonic-gate * any facility-specific non-reversible changes are made to an 2860Sstevel@tonic-gate * object. Similarly, the set operations have side effects, so they 2870Sstevel@tonic-gate * should only be called once the possibility of a facility-specific 2880Sstevel@tonic-gate * failure is eliminated. 2890Sstevel@tonic-gate * 2900Sstevel@tonic-gate * ipcs_create 2910Sstevel@tonic-gate * 2920Sstevel@tonic-gate * Creates an IPC namespace for use by an IPC facility. 2930Sstevel@tonic-gate * 2940Sstevel@tonic-gate * ipcs_destroy 2950Sstevel@tonic-gate * 2960Sstevel@tonic-gate * Destroys an IPC namespace. 2970Sstevel@tonic-gate * 2980Sstevel@tonic-gate * ipcs_lock, ipcs_unlock 2990Sstevel@tonic-gate * 3000Sstevel@tonic-gate * Takes the namespace lock. Ideally such access wouldn't be 3010Sstevel@tonic-gate * necessary, but there may be facility-specific data protected by 3020Sstevel@tonic-gate * this lock (e.g. project-wide resource consumption). 3030Sstevel@tonic-gate * 3040Sstevel@tonic-gate * ipc_lock 3050Sstevel@tonic-gate * 3060Sstevel@tonic-gate * Takes the lock associated with an ID. Can't fail. 3070Sstevel@tonic-gate * 3080Sstevel@tonic-gate * ipc_relock 3090Sstevel@tonic-gate * 3100Sstevel@tonic-gate * Like ipc_lock, but takes a pointer to a held lock. Drops the lock 3110Sstevel@tonic-gate * unless it is the one that would have been returned by ipc_lock. 3120Sstevel@tonic-gate * Used after calls to cv_wait. 3130Sstevel@tonic-gate * 3140Sstevel@tonic-gate * ipc_lookup 3150Sstevel@tonic-gate * 3160Sstevel@tonic-gate * Performs an ID lookup, returns with the ID lock held. Fails if 3170Sstevel@tonic-gate * the ID doesn't exist in the namespace. 3180Sstevel@tonic-gate * 3190Sstevel@tonic-gate * ipc_hold 3200Sstevel@tonic-gate * 3210Sstevel@tonic-gate * Takes a reference on an object. 3220Sstevel@tonic-gate * 3230Sstevel@tonic-gate * ipc_rele 3240Sstevel@tonic-gate * 3250Sstevel@tonic-gate * Releases a reference on an object, and drops the object's lock. 3260Sstevel@tonic-gate * Calls the object's destructor if last reference is being 3270Sstevel@tonic-gate * released. 3280Sstevel@tonic-gate * 3290Sstevel@tonic-gate * ipc_rele_locked 3300Sstevel@tonic-gate * 3310Sstevel@tonic-gate * Releases a reference on an object. Doesn't drop lock, and may 3320Sstevel@tonic-gate * only be called when there is more than one reference to the 3330Sstevel@tonic-gate * object. 3340Sstevel@tonic-gate * 3350Sstevel@tonic-gate * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup 3360Sstevel@tonic-gate * 3370Sstevel@tonic-gate * Components of a GET operation. ipc_get performs a key lookup, 3380Sstevel@tonic-gate * allocating an object if the key isn't found (returning with the 3390Sstevel@tonic-gate * namespace lock and p_lock held), and returning the existing object 3400Sstevel@tonic-gate * if it is (with the object lock held). ipc_get doesn't modify the 3410Sstevel@tonic-gate * namespace. 3420Sstevel@tonic-gate * 3430Sstevel@tonic-gate * ipc_commit_begin begins the process of inserting an object 3440Sstevel@tonic-gate * allocated by ipc_get into the namespace, and can fail. If 3450Sstevel@tonic-gate * successful, it returns with the namespace lock and p_lock held. 3460Sstevel@tonic-gate * ipc_commit_end completes the process of inserting an object into 3470Sstevel@tonic-gate * the namespace and can't fail. The facility can call ipc_cleanup 3480Sstevel@tonic-gate * at any time following a successful ipc_get and before 3490Sstevel@tonic-gate * ipc_commit_end or a failed ipc_commit_begin to fail the 3500Sstevel@tonic-gate * allocation. Pseudocode for the suggested GET implementation: 3510Sstevel@tonic-gate * 3520Sstevel@tonic-gate * top: 3530Sstevel@tonic-gate * 3540Sstevel@tonic-gate * ipc_get 3550Sstevel@tonic-gate * 3560Sstevel@tonic-gate * if failure 3570Sstevel@tonic-gate * return 3580Sstevel@tonic-gate * 3590Sstevel@tonic-gate * if found { 3600Sstevel@tonic-gate * 3610Sstevel@tonic-gate * if object meets criteria 3620Sstevel@tonic-gate * unlock object and return success 3630Sstevel@tonic-gate * else 3640Sstevel@tonic-gate * unlock object and return failure 3650Sstevel@tonic-gate * 3660Sstevel@tonic-gate * } else { 3670Sstevel@tonic-gate * 3680Sstevel@tonic-gate * perform resource control tests 3690Sstevel@tonic-gate * drop namespace lock, p_lock 3700Sstevel@tonic-gate * if failure 3710Sstevel@tonic-gate * ipc_cleanup 3720Sstevel@tonic-gate * 3730Sstevel@tonic-gate * perform facility-specific initialization 3740Sstevel@tonic-gate * if failure { 3750Sstevel@tonic-gate * facility-specific cleanup 3760Sstevel@tonic-gate * ipc_cleanup 3770Sstevel@tonic-gate * } 3780Sstevel@tonic-gate * 3790Sstevel@tonic-gate * ( At this point the object should be destructible using the 3800Sstevel@tonic-gate * destructor given to ipcs_create ) 3810Sstevel@tonic-gate * 3820Sstevel@tonic-gate * ipc_commit_begin 3830Sstevel@tonic-gate * if retry 3840Sstevel@tonic-gate * goto top 3850Sstevel@tonic-gate * else if failure 3860Sstevel@tonic-gate * return 3870Sstevel@tonic-gate * 3880Sstevel@tonic-gate * perform facility-specific resource control tests/allocations 3890Sstevel@tonic-gate * if failure 3900Sstevel@tonic-gate * ipc_cleanup 3910Sstevel@tonic-gate * 3920Sstevel@tonic-gate * ipc_commit_end 3930Sstevel@tonic-gate * perform any infallible post-creation actions, unlock, and return 3940Sstevel@tonic-gate * 3950Sstevel@tonic-gate * } 3960Sstevel@tonic-gate * 3970Sstevel@tonic-gate * ipc_rmid 3980Sstevel@tonic-gate * 3990Sstevel@tonic-gate * Performs the common portion of an RMID operation -- looks up an ID 4000Sstevel@tonic-gate * removes it, and calls the a facility-specific function to do 4010Sstevel@tonic-gate * RMID-time cleanup on the private portions of the object. 4020Sstevel@tonic-gate * 4030Sstevel@tonic-gate * ipc_ids 4040Sstevel@tonic-gate * 4050Sstevel@tonic-gate * Performs the common portion of an IDS operation. 4060Sstevel@tonic-gate * 4070Sstevel@tonic-gate */ 4080Sstevel@tonic-gate 4090Sstevel@tonic-gate #include <sys/types.h> 4100Sstevel@tonic-gate #include <sys/param.h> 4110Sstevel@tonic-gate #include <sys/cred.h> 4120Sstevel@tonic-gate #include <sys/policy.h> 4130Sstevel@tonic-gate #include <sys/proc.h> 4140Sstevel@tonic-gate #include <sys/user.h> 4150Sstevel@tonic-gate #include <sys/ipc.h> 4160Sstevel@tonic-gate #include <sys/ipc_impl.h> 4170Sstevel@tonic-gate #include <sys/errno.h> 4180Sstevel@tonic-gate #include <sys/systm.h> 4190Sstevel@tonic-gate #include <sys/list.h> 4200Sstevel@tonic-gate #include <sys/atomic.h> 4210Sstevel@tonic-gate #include <sys/zone.h> 4220Sstevel@tonic-gate #include <sys/task.h> 4230Sstevel@tonic-gate #include <sys/modctl.h> 4240Sstevel@tonic-gate 4250Sstevel@tonic-gate #include <c2/audit.h> 4260Sstevel@tonic-gate 4270Sstevel@tonic-gate static struct modlmisc modlmisc = { 4280Sstevel@tonic-gate &mod_miscops, 4290Sstevel@tonic-gate "common ipc code", 4300Sstevel@tonic-gate }; 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate static struct modlinkage modlinkage = { 4330Sstevel@tonic-gate MODREV_1, (void *)&modlmisc, NULL 4340Sstevel@tonic-gate }; 4350Sstevel@tonic-gate 4360Sstevel@tonic-gate 4370Sstevel@tonic-gate int 4380Sstevel@tonic-gate _init(void) 4390Sstevel@tonic-gate { 4400Sstevel@tonic-gate return (mod_install(&modlinkage)); 4410Sstevel@tonic-gate } 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate int 4440Sstevel@tonic-gate _fini(void) 4450Sstevel@tonic-gate { 4460Sstevel@tonic-gate return (mod_remove(&modlinkage)); 4470Sstevel@tonic-gate } 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate int 4500Sstevel@tonic-gate _info(struct modinfo *modinfop) 4510Sstevel@tonic-gate { 4520Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 4530Sstevel@tonic-gate } 4540Sstevel@tonic-gate 4550Sstevel@tonic-gate 4560Sstevel@tonic-gate /* 4570Sstevel@tonic-gate * Check message, semaphore, or shared memory access permissions. 4580Sstevel@tonic-gate * 4590Sstevel@tonic-gate * This routine verifies the requested access permission for the current 4600Sstevel@tonic-gate * process. The zone ids are compared, and the appropriate bits are 4610Sstevel@tonic-gate * checked corresponding to owner, group (including the list of 4620Sstevel@tonic-gate * supplementary groups), or everyone. Zero is returned on success. 4630Sstevel@tonic-gate * On failure, the security policy is asked to check to override the 4640Sstevel@tonic-gate * permissions check; the policy will either return 0 for access granted 4650Sstevel@tonic-gate * or EACCES. 4660Sstevel@tonic-gate * 4670Sstevel@tonic-gate * Access to objects in other zones requires that the caller be in the 4680Sstevel@tonic-gate * global zone and have the appropriate IPC_DAC_* privilege, regardless 4690Sstevel@tonic-gate * of whether the uid or gid match those of the object. Note that 4700Sstevel@tonic-gate * cross-zone accesses will normally never get here since they'll 4710Sstevel@tonic-gate * fail in ipc_lookup or ipc_get. 4720Sstevel@tonic-gate * 4730Sstevel@tonic-gate * The arguments must be set up as follows: 4740Sstevel@tonic-gate * p - Pointer to permission structure to verify 4750Sstevel@tonic-gate * mode - Desired access permissions 4760Sstevel@tonic-gate */ 4770Sstevel@tonic-gate int 4780Sstevel@tonic-gate ipcperm_access(kipc_perm_t *p, int mode, cred_t *cr) 4790Sstevel@tonic-gate { 4800Sstevel@tonic-gate int shifts = 0; 4810Sstevel@tonic-gate uid_t uid = crgetuid(cr); 4820Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 4830Sstevel@tonic-gate 4840Sstevel@tonic-gate if (p->ipc_zoneid == zoneid) { 4850Sstevel@tonic-gate if (uid != p->ipc_uid && uid != p->ipc_cuid) { 4860Sstevel@tonic-gate shifts += 3; 4870Sstevel@tonic-gate if (!groupmember(p->ipc_gid, cr) && 4880Sstevel@tonic-gate !groupmember(p->ipc_cgid, cr)) 4890Sstevel@tonic-gate shifts += 3; 4900Sstevel@tonic-gate } 4910Sstevel@tonic-gate 4920Sstevel@tonic-gate mode &= ~(p->ipc_mode << shifts); 4930Sstevel@tonic-gate 4940Sstevel@tonic-gate if (mode == 0) 4950Sstevel@tonic-gate return (0); 4960Sstevel@tonic-gate } else if (zoneid != GLOBAL_ZONEID) 4970Sstevel@tonic-gate return (EACCES); 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate return (secpolicy_ipc_access(cr, p, mode)); 5000Sstevel@tonic-gate } 5010Sstevel@tonic-gate 5020Sstevel@tonic-gate /* 5030Sstevel@tonic-gate * There are two versions of the ipcperm_set/stat functions: 5040Sstevel@tonic-gate * ipcperm_??? - for use with IPC_SET/STAT 5050Sstevel@tonic-gate * ipcperm_???_64 - for use with IPC_SET64/STAT64 5060Sstevel@tonic-gate * 5070Sstevel@tonic-gate * These functions encapsulate the common portions (copying, permission 5080Sstevel@tonic-gate * checks, and auditing) of the set/stat operations. All, except for 5090Sstevel@tonic-gate * stat and stat_64 which are void, return 0 on success or a non-zero 5100Sstevel@tonic-gate * errno value on error. 5110Sstevel@tonic-gate */ 5120Sstevel@tonic-gate 5130Sstevel@tonic-gate int 5140Sstevel@tonic-gate ipcperm_set(ipc_service_t *service, struct cred *cr, 5150Sstevel@tonic-gate kipc_perm_t *kperm, struct ipc_perm *perm, model_t model) 5160Sstevel@tonic-gate { 5170Sstevel@tonic-gate STRUCT_HANDLE(ipc_perm, lperm); 5180Sstevel@tonic-gate uid_t uid; 5190Sstevel@tonic-gate gid_t gid; 5200Sstevel@tonic-gate mode_t mode; 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate ASSERT(IPC_LOCKED(service, kperm)); 5230Sstevel@tonic-gate 5240Sstevel@tonic-gate STRUCT_SET_HANDLE(lperm, model, perm); 5250Sstevel@tonic-gate uid = STRUCT_FGET(lperm, uid); 5260Sstevel@tonic-gate gid = STRUCT_FGET(lperm, gid); 5270Sstevel@tonic-gate mode = STRUCT_FGET(lperm, mode); 5280Sstevel@tonic-gate 5290Sstevel@tonic-gate if (secpolicy_ipc_owner(cr, kperm) != 0) 5300Sstevel@tonic-gate return (EPERM); 5310Sstevel@tonic-gate 5320Sstevel@tonic-gate if ((uid < 0) || (uid > MAXUID) || (gid < 0) || (gid > MAXUID)) 5330Sstevel@tonic-gate return (EINVAL); 5340Sstevel@tonic-gate 5350Sstevel@tonic-gate kperm->ipc_uid = uid; 5360Sstevel@tonic-gate kperm->ipc_gid = gid; 5370Sstevel@tonic-gate kperm->ipc_mode = (mode & 0777) | (kperm->ipc_mode & ~0777); 5380Sstevel@tonic-gate 5390Sstevel@tonic-gate #ifdef C2_AUDIT 5400Sstevel@tonic-gate if (audit_active) 5410Sstevel@tonic-gate audit_ipcget(service->ipcs_atype, kperm); 5420Sstevel@tonic-gate #endif 5430Sstevel@tonic-gate 5440Sstevel@tonic-gate return (0); 5450Sstevel@tonic-gate } 5460Sstevel@tonic-gate 5470Sstevel@tonic-gate void 5480Sstevel@tonic-gate ipcperm_stat(struct ipc_perm *perm, kipc_perm_t *kperm, model_t model) 5490Sstevel@tonic-gate { 5500Sstevel@tonic-gate STRUCT_HANDLE(ipc_perm, lperm); 5510Sstevel@tonic-gate 5520Sstevel@tonic-gate STRUCT_SET_HANDLE(lperm, model, perm); 5530Sstevel@tonic-gate STRUCT_FSET(lperm, uid, kperm->ipc_uid); 5540Sstevel@tonic-gate STRUCT_FSET(lperm, gid, kperm->ipc_gid); 5550Sstevel@tonic-gate STRUCT_FSET(lperm, cuid, kperm->ipc_cuid); 5560Sstevel@tonic-gate STRUCT_FSET(lperm, cgid, kperm->ipc_cgid); 5570Sstevel@tonic-gate STRUCT_FSET(lperm, mode, kperm->ipc_mode); 5580Sstevel@tonic-gate STRUCT_FSET(lperm, seq, 0); 5590Sstevel@tonic-gate STRUCT_FSET(lperm, key, kperm->ipc_key); 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate int 5630Sstevel@tonic-gate ipcperm_set64(ipc_service_t *service, struct cred *cr, 5640Sstevel@tonic-gate kipc_perm_t *kperm, ipc_perm64_t *perm64) 5650Sstevel@tonic-gate { 5660Sstevel@tonic-gate ASSERT(IPC_LOCKED(service, kperm)); 5670Sstevel@tonic-gate 5680Sstevel@tonic-gate if (secpolicy_ipc_owner(cr, kperm) != 0) 5690Sstevel@tonic-gate return (EPERM); 5700Sstevel@tonic-gate 5710Sstevel@tonic-gate if ((perm64->ipcx_uid < 0) || (perm64->ipcx_uid > MAXUID) || 5720Sstevel@tonic-gate (perm64->ipcx_gid < 0) || (perm64->ipcx_gid > MAXUID)) 5730Sstevel@tonic-gate return (EINVAL); 5740Sstevel@tonic-gate 5750Sstevel@tonic-gate kperm->ipc_uid = perm64->ipcx_uid; 5760Sstevel@tonic-gate kperm->ipc_gid = perm64->ipcx_gid; 5770Sstevel@tonic-gate kperm->ipc_mode = (perm64->ipcx_mode & 0777) | 5780Sstevel@tonic-gate (kperm->ipc_mode & ~0777); 5790Sstevel@tonic-gate 5800Sstevel@tonic-gate #ifdef C2_AUDIT 5810Sstevel@tonic-gate if (audit_active) 5820Sstevel@tonic-gate audit_ipcget(service->ipcs_atype, kperm); 5830Sstevel@tonic-gate #endif 5840Sstevel@tonic-gate 5850Sstevel@tonic-gate return (0); 5860Sstevel@tonic-gate } 5870Sstevel@tonic-gate 5880Sstevel@tonic-gate void 5890Sstevel@tonic-gate ipcperm_stat64(ipc_perm64_t *perm64, kipc_perm_t *kperm) 5900Sstevel@tonic-gate { 5910Sstevel@tonic-gate perm64->ipcx_uid = kperm->ipc_uid; 5920Sstevel@tonic-gate perm64->ipcx_gid = kperm->ipc_gid; 5930Sstevel@tonic-gate perm64->ipcx_cuid = kperm->ipc_cuid; 5940Sstevel@tonic-gate perm64->ipcx_cgid = kperm->ipc_cgid; 5950Sstevel@tonic-gate perm64->ipcx_mode = kperm->ipc_mode; 5960Sstevel@tonic-gate perm64->ipcx_key = kperm->ipc_key; 5970Sstevel@tonic-gate perm64->ipcx_projid = kperm->ipc_proj->kpj_id; 5980Sstevel@tonic-gate perm64->ipcx_zoneid = kperm->ipc_zoneid; 5990Sstevel@tonic-gate } 6000Sstevel@tonic-gate 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate /* 6030Sstevel@tonic-gate * ipc key comparator. 6040Sstevel@tonic-gate */ 6050Sstevel@tonic-gate static int 6060Sstevel@tonic-gate ipc_key_compar(const void *a, const void *b) 6070Sstevel@tonic-gate { 6080Sstevel@tonic-gate kipc_perm_t *aperm = (kipc_perm_t *)a; 6090Sstevel@tonic-gate kipc_perm_t *bperm = (kipc_perm_t *)b; 6100Sstevel@tonic-gate int ak = aperm->ipc_key; 6110Sstevel@tonic-gate int bk = bperm->ipc_key; 6120Sstevel@tonic-gate zoneid_t az; 6130Sstevel@tonic-gate zoneid_t bz; 6140Sstevel@tonic-gate 6150Sstevel@tonic-gate ASSERT(ak != IPC_PRIVATE); 6160Sstevel@tonic-gate ASSERT(bk != IPC_PRIVATE); 6170Sstevel@tonic-gate 6180Sstevel@tonic-gate /* 6190Sstevel@tonic-gate * Compare key first, then zoneid. This optimizes performance for 6200Sstevel@tonic-gate * systems with only one zone, since the zone checks will only be 6210Sstevel@tonic-gate * made when the keys match. 6220Sstevel@tonic-gate */ 6230Sstevel@tonic-gate if (ak < bk) 6240Sstevel@tonic-gate return (-1); 6250Sstevel@tonic-gate if (ak > bk) 6260Sstevel@tonic-gate return (1); 6270Sstevel@tonic-gate 6280Sstevel@tonic-gate /* keys match */ 6290Sstevel@tonic-gate az = aperm->ipc_zoneid; 6300Sstevel@tonic-gate bz = bperm->ipc_zoneid; 6310Sstevel@tonic-gate if (az < bz) 6320Sstevel@tonic-gate return (-1); 6330Sstevel@tonic-gate if (az > bz) 6340Sstevel@tonic-gate return (1); 6350Sstevel@tonic-gate return (0); 6360Sstevel@tonic-gate } 6370Sstevel@tonic-gate 6380Sstevel@tonic-gate /* 6390Sstevel@tonic-gate * Create an ipc service. 6400Sstevel@tonic-gate */ 6410Sstevel@tonic-gate ipc_service_t * 642*2677Sml93401 ipcs_create(const char *name, rctl_hndl_t proj_rctl, rctl_hndl_t zone_rctl, 643*2677Sml93401 size_t size, ipc_func_t *dtor, ipc_func_t *rmid, int audit_type, 644*2677Sml93401 size_t rctl_offset) 6450Sstevel@tonic-gate { 6460Sstevel@tonic-gate ipc_service_t *result; 6470Sstevel@tonic-gate 6480Sstevel@tonic-gate result = kmem_alloc(sizeof (ipc_service_t), KM_SLEEP); 6490Sstevel@tonic-gate 6500Sstevel@tonic-gate mutex_init(&result->ipcs_lock, NULL, MUTEX_ADAPTIVE, NULL); 6510Sstevel@tonic-gate result->ipcs_count = 0; 6520Sstevel@tonic-gate avl_create(&result->ipcs_keys, ipc_key_compar, size, 0); 6530Sstevel@tonic-gate result->ipcs_tabsz = IPC_IDS_MIN; 6540Sstevel@tonic-gate result->ipcs_table = 6550Sstevel@tonic-gate kmem_zalloc(IPC_IDS_MIN * sizeof (ipc_slot_t), KM_SLEEP); 6560Sstevel@tonic-gate result->ipcs_ssize = size; 6570Sstevel@tonic-gate result->ipcs_ids = id_space_create(name, 0, IPC_IDS_MIN); 6580Sstevel@tonic-gate result->ipcs_dtor = dtor; 6590Sstevel@tonic-gate result->ipcs_rmid = rmid; 660*2677Sml93401 result->ipcs_proj_rctl = proj_rctl; 661*2677Sml93401 result->ipcs_zone_rctl = zone_rctl; 6620Sstevel@tonic-gate result->ipcs_atype = audit_type; 663*2677Sml93401 ASSERT(rctl_offset < sizeof (ipc_rqty_t)); 6640Sstevel@tonic-gate result->ipcs_rctlofs = rctl_offset; 6650Sstevel@tonic-gate list_create(&result->ipcs_usedids, sizeof (kipc_perm_t), 6660Sstevel@tonic-gate offsetof(kipc_perm_t, ipc_list)); 6670Sstevel@tonic-gate 6680Sstevel@tonic-gate return (result); 6690Sstevel@tonic-gate } 6700Sstevel@tonic-gate 6710Sstevel@tonic-gate /* 6720Sstevel@tonic-gate * Destroy an ipc service. 6730Sstevel@tonic-gate */ 6740Sstevel@tonic-gate void 6750Sstevel@tonic-gate ipcs_destroy(ipc_service_t *service) 6760Sstevel@tonic-gate { 6770Sstevel@tonic-gate ipc_slot_t *slot, *next; 6780Sstevel@tonic-gate 6790Sstevel@tonic-gate mutex_enter(&service->ipcs_lock); 6800Sstevel@tonic-gate 6810Sstevel@tonic-gate ASSERT(service->ipcs_count == 0); 6820Sstevel@tonic-gate avl_destroy(&service->ipcs_keys); 6830Sstevel@tonic-gate list_destroy(&service->ipcs_usedids); 6840Sstevel@tonic-gate id_space_destroy(service->ipcs_ids); 6850Sstevel@tonic-gate 6860Sstevel@tonic-gate for (slot = service->ipcs_table; slot; slot = next) { 6870Sstevel@tonic-gate next = slot[0].ipct_chain; 6880Sstevel@tonic-gate kmem_free(slot, service->ipcs_tabsz * sizeof (ipc_slot_t)); 6890Sstevel@tonic-gate service->ipcs_tabsz >>= 1; 6900Sstevel@tonic-gate } 6910Sstevel@tonic-gate 6920Sstevel@tonic-gate mutex_destroy(&service->ipcs_lock); 6930Sstevel@tonic-gate kmem_free(service, sizeof (ipc_service_t)); 6940Sstevel@tonic-gate } 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate /* 6970Sstevel@tonic-gate * Takes the service lock. 6980Sstevel@tonic-gate */ 6990Sstevel@tonic-gate void 7000Sstevel@tonic-gate ipcs_lock(ipc_service_t *service) 7010Sstevel@tonic-gate { 7020Sstevel@tonic-gate mutex_enter(&service->ipcs_lock); 7030Sstevel@tonic-gate } 7040Sstevel@tonic-gate 7050Sstevel@tonic-gate /* 7060Sstevel@tonic-gate * Releases the service lock. 7070Sstevel@tonic-gate */ 7080Sstevel@tonic-gate void 7090Sstevel@tonic-gate ipcs_unlock(ipc_service_t *service) 7100Sstevel@tonic-gate { 7110Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 7120Sstevel@tonic-gate } 7130Sstevel@tonic-gate 7140Sstevel@tonic-gate 7150Sstevel@tonic-gate /* 7160Sstevel@tonic-gate * Locks the specified ID. Returns the ID's ID table index. 7170Sstevel@tonic-gate */ 7180Sstevel@tonic-gate static int 7190Sstevel@tonic-gate ipc_lock_internal(ipc_service_t *service, uint_t id) 7200Sstevel@tonic-gate { 7210Sstevel@tonic-gate uint_t tabsz; 7220Sstevel@tonic-gate uint_t index; 7230Sstevel@tonic-gate kmutex_t *mutex; 7240Sstevel@tonic-gate 7250Sstevel@tonic-gate for (;;) { 7260Sstevel@tonic-gate tabsz = service->ipcs_tabsz; 7270Sstevel@tonic-gate membar_consumer(); 7280Sstevel@tonic-gate index = id & (tabsz - 1); 7290Sstevel@tonic-gate mutex = &service->ipcs_table[index].ipct_lock; 7300Sstevel@tonic-gate mutex_enter(mutex); 7310Sstevel@tonic-gate if (tabsz == service->ipcs_tabsz) 7320Sstevel@tonic-gate break; 7330Sstevel@tonic-gate mutex_exit(mutex); 7340Sstevel@tonic-gate } 7350Sstevel@tonic-gate 7360Sstevel@tonic-gate return (index); 7370Sstevel@tonic-gate } 7380Sstevel@tonic-gate 7390Sstevel@tonic-gate /* 7400Sstevel@tonic-gate * Locks the specified ID. Returns a pointer to the ID's lock. 7410Sstevel@tonic-gate */ 7420Sstevel@tonic-gate kmutex_t * 7430Sstevel@tonic-gate ipc_lock(ipc_service_t *service, int id) 7440Sstevel@tonic-gate { 7450Sstevel@tonic-gate uint_t index; 7460Sstevel@tonic-gate 7470Sstevel@tonic-gate /* 7480Sstevel@tonic-gate * These assertions don't reflect requirements of the code 7490Sstevel@tonic-gate * which follows, but they should never fail nonetheless. 7500Sstevel@tonic-gate */ 7510Sstevel@tonic-gate ASSERT(id >= 0); 7520Sstevel@tonic-gate ASSERT(IPC_INDEX(id) < service->ipcs_tabsz); 7530Sstevel@tonic-gate index = ipc_lock_internal(service, id); 7540Sstevel@tonic-gate 7550Sstevel@tonic-gate return (&service->ipcs_table[index].ipct_lock); 7560Sstevel@tonic-gate } 7570Sstevel@tonic-gate 7580Sstevel@tonic-gate /* 7590Sstevel@tonic-gate * Checks to see if the held lock provided is the current lock for the 7600Sstevel@tonic-gate * specified id. If so, we return it instead of dropping it and 7610Sstevel@tonic-gate * returning the result of ipc_lock. This is intended to speed up cv 7620Sstevel@tonic-gate * wakeups where we are left holding a lock which could be stale, but 7630Sstevel@tonic-gate * probably isn't. 7640Sstevel@tonic-gate */ 7650Sstevel@tonic-gate kmutex_t * 7660Sstevel@tonic-gate ipc_relock(ipc_service_t *service, int id, kmutex_t *lock) 7670Sstevel@tonic-gate { 7680Sstevel@tonic-gate ASSERT(id >= 0); 7690Sstevel@tonic-gate ASSERT(IPC_INDEX(id) < service->ipcs_tabsz); 7700Sstevel@tonic-gate ASSERT(MUTEX_HELD(lock)); 7710Sstevel@tonic-gate 7720Sstevel@tonic-gate if (&service->ipcs_table[IPC_INDEX(id)].ipct_lock == lock) 7730Sstevel@tonic-gate return (lock); 7740Sstevel@tonic-gate 7750Sstevel@tonic-gate mutex_exit(lock); 7760Sstevel@tonic-gate return (ipc_lock(service, id)); 7770Sstevel@tonic-gate } 7780Sstevel@tonic-gate 7790Sstevel@tonic-gate /* 7800Sstevel@tonic-gate * Performs an ID lookup. If the ID doesn't exist or has been removed, 7810Sstevel@tonic-gate * or isn't visible to the caller (because of zones), NULL is returned. 7820Sstevel@tonic-gate * Otherwise, a pointer to the ID's perm structure and held ID lock are 7830Sstevel@tonic-gate * returned. 7840Sstevel@tonic-gate */ 7850Sstevel@tonic-gate kmutex_t * 7860Sstevel@tonic-gate ipc_lookup(ipc_service_t *service, int id, kipc_perm_t **perm) 7870Sstevel@tonic-gate { 7880Sstevel@tonic-gate kipc_perm_t *result; 7890Sstevel@tonic-gate uint_t index; 7900Sstevel@tonic-gate 7910Sstevel@tonic-gate /* 7920Sstevel@tonic-gate * There is no need to check to see if id is in-range (i.e. 7930Sstevel@tonic-gate * positive and fits into the table). If it is out-of-range, 7940Sstevel@tonic-gate * the id simply won't match the object's. 7950Sstevel@tonic-gate */ 7960Sstevel@tonic-gate 7970Sstevel@tonic-gate index = ipc_lock_internal(service, id); 7980Sstevel@tonic-gate result = service->ipcs_table[index].ipct_data; 7990Sstevel@tonic-gate if (result == NULL || result->ipc_id != (uint_t)id || 8000Sstevel@tonic-gate !HASZONEACCESS(curproc, result->ipc_zoneid)) { 8010Sstevel@tonic-gate mutex_exit(&service->ipcs_table[index].ipct_lock); 8020Sstevel@tonic-gate return (NULL); 8030Sstevel@tonic-gate } 8040Sstevel@tonic-gate 8050Sstevel@tonic-gate ASSERT(IPC_SEQ(id) == service->ipcs_table[index].ipct_seq); 8060Sstevel@tonic-gate 8070Sstevel@tonic-gate *perm = result; 8080Sstevel@tonic-gate #ifdef C2_AUDIT 8090Sstevel@tonic-gate if (audit_active) 8100Sstevel@tonic-gate audit_ipc(service->ipcs_atype, id, result); 8110Sstevel@tonic-gate #endif 8120Sstevel@tonic-gate 8130Sstevel@tonic-gate return (&service->ipcs_table[index].ipct_lock); 8140Sstevel@tonic-gate } 8150Sstevel@tonic-gate 8160Sstevel@tonic-gate /* 8170Sstevel@tonic-gate * Increase the reference count on an ID. 8180Sstevel@tonic-gate */ 8190Sstevel@tonic-gate /*ARGSUSED*/ 8200Sstevel@tonic-gate void 8210Sstevel@tonic-gate ipc_hold(ipc_service_t *s, kipc_perm_t *perm) 8220Sstevel@tonic-gate { 8230Sstevel@tonic-gate ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz); 8240Sstevel@tonic-gate ASSERT(IPC_LOCKED(s, perm)); 8250Sstevel@tonic-gate perm->ipc_ref++; 8260Sstevel@tonic-gate } 8270Sstevel@tonic-gate 8280Sstevel@tonic-gate /* 8290Sstevel@tonic-gate * Decrease the reference count on an ID and drops the ID's lock. 8300Sstevel@tonic-gate * Destroys the ID if the new reference count is zero. 8310Sstevel@tonic-gate */ 8320Sstevel@tonic-gate void 8330Sstevel@tonic-gate ipc_rele(ipc_service_t *s, kipc_perm_t *perm) 8340Sstevel@tonic-gate { 8350Sstevel@tonic-gate int nref; 8360Sstevel@tonic-gate 8370Sstevel@tonic-gate ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz); 8380Sstevel@tonic-gate ASSERT(IPC_LOCKED(s, perm)); 8390Sstevel@tonic-gate ASSERT(perm->ipc_ref > 0); 8400Sstevel@tonic-gate 8410Sstevel@tonic-gate nref = --perm->ipc_ref; 8420Sstevel@tonic-gate mutex_exit(&s->ipcs_table[IPC_INDEX(perm->ipc_id)].ipct_lock); 8430Sstevel@tonic-gate 8440Sstevel@tonic-gate if (nref == 0) { 8450Sstevel@tonic-gate ASSERT(IPC_FREE(perm)); /* ipc_rmid clears IPC_ALLOC */ 8460Sstevel@tonic-gate s->ipcs_dtor(perm); 8470Sstevel@tonic-gate project_rele(perm->ipc_proj); 848*2677Sml93401 zone_rele(perm->ipc_zone); 8490Sstevel@tonic-gate kmem_free(perm, s->ipcs_ssize); 8500Sstevel@tonic-gate } 8510Sstevel@tonic-gate } 8520Sstevel@tonic-gate 8530Sstevel@tonic-gate /* 8540Sstevel@tonic-gate * Decrease the reference count on an ID, but don't drop the ID lock. 8550Sstevel@tonic-gate * Used in cases where one thread needs to remove many references (on 8560Sstevel@tonic-gate * behalf of other parties). 8570Sstevel@tonic-gate */ 8580Sstevel@tonic-gate void 8590Sstevel@tonic-gate ipc_rele_locked(ipc_service_t *s, kipc_perm_t *perm) 8600Sstevel@tonic-gate { 8610Sstevel@tonic-gate ASSERT(perm->ipc_ref > 1); 8620Sstevel@tonic-gate ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz); 8630Sstevel@tonic-gate ASSERT(IPC_LOCKED(s, perm)); 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate perm->ipc_ref--; 8660Sstevel@tonic-gate } 8670Sstevel@tonic-gate 8680Sstevel@tonic-gate 8690Sstevel@tonic-gate /* 8700Sstevel@tonic-gate * Internal function to grow the service ID table. 8710Sstevel@tonic-gate */ 8720Sstevel@tonic-gate static int 8730Sstevel@tonic-gate ipc_grow(ipc_service_t *service) 8740Sstevel@tonic-gate { 8750Sstevel@tonic-gate ipc_slot_t *new, *old; 8760Sstevel@tonic-gate int i, oldsize, newsize; 8770Sstevel@tonic-gate 8780Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock)); 8790Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 8800Sstevel@tonic-gate 8810Sstevel@tonic-gate if (service->ipcs_tabsz == IPC_IDS_MAX) 8820Sstevel@tonic-gate return (ENOSPC); 8830Sstevel@tonic-gate 8840Sstevel@tonic-gate oldsize = service->ipcs_tabsz; 8850Sstevel@tonic-gate newsize = oldsize << 1; 8860Sstevel@tonic-gate new = kmem_zalloc(newsize * sizeof (ipc_slot_t), KM_NOSLEEP); 8870Sstevel@tonic-gate if (new == NULL) 8880Sstevel@tonic-gate return (ENOSPC); 8890Sstevel@tonic-gate 8900Sstevel@tonic-gate old = service->ipcs_table; 8910Sstevel@tonic-gate for (i = 0; i < oldsize; i++) { 8920Sstevel@tonic-gate mutex_enter(&old[i].ipct_lock); 8930Sstevel@tonic-gate mutex_enter(&new[i].ipct_lock); 8940Sstevel@tonic-gate 8950Sstevel@tonic-gate new[i].ipct_seq = old[i].ipct_seq; 8960Sstevel@tonic-gate new[i].ipct_data = old[i].ipct_data; 8970Sstevel@tonic-gate old[i].ipct_data = NULL; 8980Sstevel@tonic-gate } 8990Sstevel@tonic-gate 9000Sstevel@tonic-gate new[0].ipct_chain = old; 9010Sstevel@tonic-gate service->ipcs_table = new; 9020Sstevel@tonic-gate membar_producer(); 9030Sstevel@tonic-gate service->ipcs_tabsz = newsize; 9040Sstevel@tonic-gate 9050Sstevel@tonic-gate for (i = 0; i < oldsize; i++) { 9060Sstevel@tonic-gate mutex_exit(&old[i].ipct_lock); 9070Sstevel@tonic-gate mutex_exit(&new[i].ipct_lock); 9080Sstevel@tonic-gate } 9090Sstevel@tonic-gate 9100Sstevel@tonic-gate id_space_extend(service->ipcs_ids, oldsize, service->ipcs_tabsz); 9110Sstevel@tonic-gate 9120Sstevel@tonic-gate return (0); 9130Sstevel@tonic-gate } 9140Sstevel@tonic-gate 9150Sstevel@tonic-gate 9160Sstevel@tonic-gate static int 9170Sstevel@tonic-gate ipc_keylookup(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp) 9180Sstevel@tonic-gate { 9190Sstevel@tonic-gate kipc_perm_t *perm = NULL; 9200Sstevel@tonic-gate avl_index_t where; 9210Sstevel@tonic-gate kipc_perm_t template; 9220Sstevel@tonic-gate 9230Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock)); 9240Sstevel@tonic-gate 9250Sstevel@tonic-gate template.ipc_key = key; 9260Sstevel@tonic-gate template.ipc_zoneid = getzoneid(); 9270Sstevel@tonic-gate if (perm = avl_find(&service->ipcs_keys, &template, &where)) { 9280Sstevel@tonic-gate ASSERT(!IPC_FREE(perm)); 9290Sstevel@tonic-gate if ((flag & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) 9300Sstevel@tonic-gate return (EEXIST); 9310Sstevel@tonic-gate if ((flag & 0777) & ~perm->ipc_mode) { 9320Sstevel@tonic-gate #ifdef C2_AUDIT 9330Sstevel@tonic-gate if (audit_active) 9340Sstevel@tonic-gate audit_ipcget(NULL, (void *)perm); 9350Sstevel@tonic-gate #endif 9360Sstevel@tonic-gate return (EACCES); 9370Sstevel@tonic-gate } 9380Sstevel@tonic-gate *permp = perm; 9390Sstevel@tonic-gate return (0); 9400Sstevel@tonic-gate } else if (flag & IPC_CREAT) { 9410Sstevel@tonic-gate *permp = NULL; 9420Sstevel@tonic-gate return (0); 9430Sstevel@tonic-gate } 9440Sstevel@tonic-gate return (ENOENT); 9450Sstevel@tonic-gate } 9460Sstevel@tonic-gate 9470Sstevel@tonic-gate static int 9480Sstevel@tonic-gate ipc_alloc_test(ipc_service_t *service, proc_t *pp) 9490Sstevel@tonic-gate { 9500Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock)); 9510Sstevel@tonic-gate 9520Sstevel@tonic-gate /* 9530Sstevel@tonic-gate * Resizing the table first would result in a cleaner code 9540Sstevel@tonic-gate * path, but would also allow a user to (permanently) double 9550Sstevel@tonic-gate * the id table size in cases where the allocation would be 9560Sstevel@tonic-gate * denied. Hence we test the rctl first. 9570Sstevel@tonic-gate */ 9580Sstevel@tonic-gate retry: 9590Sstevel@tonic-gate mutex_enter(&pp->p_lock); 960*2677Sml93401 if ((rctl_test(service->ipcs_proj_rctl, pp->p_task->tk_proj->kpj_rctls, 961*2677Sml93401 pp, 1, RCA_SAFE) & RCT_DENY) || 962*2677Sml93401 (rctl_test(service->ipcs_zone_rctl, pp->p_zone->zone_rctls, 963*2677Sml93401 pp, 1, RCA_SAFE) & RCT_DENY)) { 9640Sstevel@tonic-gate mutex_exit(&pp->p_lock); 9650Sstevel@tonic-gate return (ENOSPC); 9660Sstevel@tonic-gate } 9670Sstevel@tonic-gate 9680Sstevel@tonic-gate if (service->ipcs_count == service->ipcs_tabsz) { 9690Sstevel@tonic-gate int error; 9700Sstevel@tonic-gate 9710Sstevel@tonic-gate mutex_exit(&pp->p_lock); 9720Sstevel@tonic-gate if (error = ipc_grow(service)) 9730Sstevel@tonic-gate return (error); 9740Sstevel@tonic-gate goto retry; 9750Sstevel@tonic-gate } 9760Sstevel@tonic-gate 9770Sstevel@tonic-gate return (0); 9780Sstevel@tonic-gate } 9790Sstevel@tonic-gate 9800Sstevel@tonic-gate /* 9810Sstevel@tonic-gate * Given a key, search for or create the associated identifier. 9820Sstevel@tonic-gate * 9830Sstevel@tonic-gate * If IPC_CREAT is specified and the key isn't found, or if the key is 9840Sstevel@tonic-gate * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly 9850Sstevel@tonic-gate * allocated object structure in permp. A pointer to the held service 9860Sstevel@tonic-gate * lock is placed in lockp. ipc_mode's IPC_ALLOC bit is clear. 9870Sstevel@tonic-gate * 9880Sstevel@tonic-gate * If the key is found and no error conditions arise, we return 0 and 9890Sstevel@tonic-gate * place a pointer to the existing object structure in permp. A 9900Sstevel@tonic-gate * pointer to the held ID lock is placed in lockp. ipc_mode's 9910Sstevel@tonic-gate * IPC_ALLOC bit is set. 9920Sstevel@tonic-gate * 9930Sstevel@tonic-gate * Otherwise, a non-zero errno value is returned. 9940Sstevel@tonic-gate */ 9950Sstevel@tonic-gate int 9960Sstevel@tonic-gate ipc_get(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp, 9970Sstevel@tonic-gate kmutex_t **lockp) 9980Sstevel@tonic-gate { 9990Sstevel@tonic-gate kipc_perm_t *perm = NULL; 10000Sstevel@tonic-gate proc_t *pp = curproc; 10010Sstevel@tonic-gate int error, index; 10020Sstevel@tonic-gate cred_t *cr = CRED(); 10030Sstevel@tonic-gate 10040Sstevel@tonic-gate if (key != IPC_PRIVATE) { 10050Sstevel@tonic-gate 10060Sstevel@tonic-gate mutex_enter(&service->ipcs_lock); 10070Sstevel@tonic-gate error = ipc_keylookup(service, key, flag, &perm); 10080Sstevel@tonic-gate if (perm != NULL) 10090Sstevel@tonic-gate index = ipc_lock_internal(service, perm->ipc_id); 10100Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 10110Sstevel@tonic-gate 10120Sstevel@tonic-gate if (error) { 10130Sstevel@tonic-gate ASSERT(perm == NULL); 10140Sstevel@tonic-gate return (error); 10150Sstevel@tonic-gate } 10160Sstevel@tonic-gate 10170Sstevel@tonic-gate if (perm) { 10180Sstevel@tonic-gate ASSERT(!IPC_FREE(perm)); 10190Sstevel@tonic-gate *permp = perm; 10200Sstevel@tonic-gate *lockp = &service->ipcs_table[index].ipct_lock; 10210Sstevel@tonic-gate return (0); 10220Sstevel@tonic-gate } 10230Sstevel@tonic-gate 10240Sstevel@tonic-gate /* Key not found; fall through */ 10250Sstevel@tonic-gate } 10260Sstevel@tonic-gate 10270Sstevel@tonic-gate perm = kmem_zalloc(service->ipcs_ssize, KM_SLEEP); 10280Sstevel@tonic-gate 10290Sstevel@tonic-gate mutex_enter(&service->ipcs_lock); 10300Sstevel@tonic-gate if (error = ipc_alloc_test(service, pp)) { 10310Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 10320Sstevel@tonic-gate kmem_free(perm, service->ipcs_ssize); 10330Sstevel@tonic-gate return (error); 10340Sstevel@tonic-gate } 10350Sstevel@tonic-gate 10360Sstevel@tonic-gate perm->ipc_cuid = perm->ipc_uid = crgetuid(cr); 10370Sstevel@tonic-gate perm->ipc_cgid = perm->ipc_gid = crgetgid(cr); 10380Sstevel@tonic-gate perm->ipc_zoneid = getzoneid(); 10390Sstevel@tonic-gate perm->ipc_mode = flag & 0777; 10400Sstevel@tonic-gate perm->ipc_key = key; 10410Sstevel@tonic-gate perm->ipc_ref = 1; 10420Sstevel@tonic-gate perm->ipc_id = IPC_ID_INVAL; 10430Sstevel@tonic-gate *permp = perm; 10440Sstevel@tonic-gate *lockp = &service->ipcs_lock; 10450Sstevel@tonic-gate 10460Sstevel@tonic-gate return (0); 10470Sstevel@tonic-gate } 10480Sstevel@tonic-gate 10490Sstevel@tonic-gate /* 10500Sstevel@tonic-gate * Attempts to add the a newly created ID to the global namespace. If 10510Sstevel@tonic-gate * creating it would cause an error, we return the error. If there is 10520Sstevel@tonic-gate * the possibility that we could obtain the existing ID and return it 10530Sstevel@tonic-gate * to the user, we return EAGAIN. Otherwise, we return 0 with p_lock 10540Sstevel@tonic-gate * and the service lock held. 10550Sstevel@tonic-gate * 10560Sstevel@tonic-gate * Since this should be only called after all initialization has been 10570Sstevel@tonic-gate * completed, on failure we automatically invoke the destructor for the 10580Sstevel@tonic-gate * object and deallocate the memory associated with it. 10590Sstevel@tonic-gate */ 10600Sstevel@tonic-gate int 10610Sstevel@tonic-gate ipc_commit_begin(ipc_service_t *service, key_t key, int flag, 10620Sstevel@tonic-gate kipc_perm_t *newperm) 10630Sstevel@tonic-gate { 10640Sstevel@tonic-gate kipc_perm_t *perm; 10650Sstevel@tonic-gate int error; 10660Sstevel@tonic-gate proc_t *pp = curproc; 10670Sstevel@tonic-gate 10680Sstevel@tonic-gate ASSERT(newperm->ipc_ref == 1); 10690Sstevel@tonic-gate ASSERT(IPC_FREE(newperm)); 10700Sstevel@tonic-gate 10710Sstevel@tonic-gate mutex_enter(&service->ipcs_lock); 10720Sstevel@tonic-gate /* 10730Sstevel@tonic-gate * Ensure that no-one has raced with us and created the key. 10740Sstevel@tonic-gate */ 10750Sstevel@tonic-gate if ((key != IPC_PRIVATE) && 10760Sstevel@tonic-gate (((error = ipc_keylookup(service, key, flag, &perm)) != 0) || 10770Sstevel@tonic-gate (perm != NULL))) { 10780Sstevel@tonic-gate error = error ? error : EAGAIN; 10790Sstevel@tonic-gate goto errout; 10800Sstevel@tonic-gate } 10810Sstevel@tonic-gate 10820Sstevel@tonic-gate /* 10830Sstevel@tonic-gate * Ensure that no-one has raced with us and used the last of 10840Sstevel@tonic-gate * the permissible ids, or the last of the free spaces in the 10850Sstevel@tonic-gate * id table. 10860Sstevel@tonic-gate */ 10870Sstevel@tonic-gate if (error = ipc_alloc_test(service, pp)) 10880Sstevel@tonic-gate goto errout; 10890Sstevel@tonic-gate 10900Sstevel@tonic-gate /* 10910Sstevel@tonic-gate * Set ipc_proj so ipc_cleanup cleans up necessary state. 10920Sstevel@tonic-gate */ 10930Sstevel@tonic-gate newperm->ipc_proj = pp->p_task->tk_proj; 1094*2677Sml93401 newperm->ipc_zone = pp->p_zone; 10950Sstevel@tonic-gate 10960Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock)); 10970Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pp->p_lock)); 10980Sstevel@tonic-gate 10990Sstevel@tonic-gate return (0); 11000Sstevel@tonic-gate errout: 11010Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 11020Sstevel@tonic-gate service->ipcs_dtor(newperm); 11030Sstevel@tonic-gate kmem_free(newperm, service->ipcs_ssize); 11040Sstevel@tonic-gate return (error); 11050Sstevel@tonic-gate } 11060Sstevel@tonic-gate 11070Sstevel@tonic-gate /* 11080Sstevel@tonic-gate * Commit the ID allocation transaction. Called with p_lock and the 11090Sstevel@tonic-gate * service lock held, both of which are dropped. Returns the held ID 11100Sstevel@tonic-gate * lock so the caller can extract the ID and perform ipcget auditing. 11110Sstevel@tonic-gate */ 11120Sstevel@tonic-gate kmutex_t * 11130Sstevel@tonic-gate ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm) 11140Sstevel@tonic-gate { 11150Sstevel@tonic-gate ipc_slot_t *slot; 11160Sstevel@tonic-gate avl_index_t where; 11170Sstevel@tonic-gate int index; 11180Sstevel@tonic-gate void *loc; 11190Sstevel@tonic-gate 11200Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock)); 11210Sstevel@tonic-gate ASSERT(MUTEX_HELD(&curproc->p_lock)); 11220Sstevel@tonic-gate 11230Sstevel@tonic-gate (void) project_hold(perm->ipc_proj); 1124*2677Sml93401 (void) zone_hold(perm->ipc_zone); 11250Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 11260Sstevel@tonic-gate 11270Sstevel@tonic-gate /* 11280Sstevel@tonic-gate * Pick out our slot. 11290Sstevel@tonic-gate */ 11300Sstevel@tonic-gate service->ipcs_count++; 11310Sstevel@tonic-gate index = id_alloc(service->ipcs_ids); 11320Sstevel@tonic-gate ASSERT(index < service->ipcs_tabsz); 11330Sstevel@tonic-gate slot = &service->ipcs_table[index]; 11340Sstevel@tonic-gate mutex_enter(&slot->ipct_lock); 11350Sstevel@tonic-gate ASSERT(slot->ipct_data == NULL); 11360Sstevel@tonic-gate 11370Sstevel@tonic-gate /* 11380Sstevel@tonic-gate * Update the perm structure. 11390Sstevel@tonic-gate */ 11400Sstevel@tonic-gate perm->ipc_mode |= IPC_ALLOC; 11410Sstevel@tonic-gate perm->ipc_id = (slot->ipct_seq << IPC_SEQ_SHIFT) | index; 11420Sstevel@tonic-gate 11430Sstevel@tonic-gate /* 11440Sstevel@tonic-gate * Push into global visibility. 11450Sstevel@tonic-gate */ 11460Sstevel@tonic-gate slot->ipct_data = perm; 11470Sstevel@tonic-gate if (perm->ipc_key != IPC_PRIVATE) { 11480Sstevel@tonic-gate loc = avl_find(&service->ipcs_keys, perm, &where); 11490Sstevel@tonic-gate ASSERT(loc == NULL); 11500Sstevel@tonic-gate avl_insert(&service->ipcs_keys, perm, where); 11510Sstevel@tonic-gate } 11520Sstevel@tonic-gate list_insert_head(&service->ipcs_usedids, perm); 11530Sstevel@tonic-gate 11540Sstevel@tonic-gate /* 11550Sstevel@tonic-gate * Update resource consumption. 11560Sstevel@tonic-gate */ 1157*2677Sml93401 IPC_PROJ_USAGE(perm, service) += 1; 1158*2677Sml93401 IPC_ZONE_USAGE(perm, service) += 1; 11590Sstevel@tonic-gate 11600Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 11610Sstevel@tonic-gate return (&slot->ipct_lock); 11620Sstevel@tonic-gate } 11630Sstevel@tonic-gate 11640Sstevel@tonic-gate /* 11650Sstevel@tonic-gate * Clean up function, in case the allocation fails. If called between 11660Sstevel@tonic-gate * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we 11670Sstevel@tonic-gate * merely free the perm structure. If called after ipc_commit_begin, 11680Sstevel@tonic-gate * we also drop locks and call the ID's destructor. 11690Sstevel@tonic-gate */ 11700Sstevel@tonic-gate void 11710Sstevel@tonic-gate ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm) 11720Sstevel@tonic-gate { 11730Sstevel@tonic-gate ASSERT(IPC_FREE(perm)); 11740Sstevel@tonic-gate if (perm->ipc_proj) { 11750Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 11760Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 11770Sstevel@tonic-gate service->ipcs_dtor(perm); 11780Sstevel@tonic-gate } 11790Sstevel@tonic-gate kmem_free(perm, service->ipcs_ssize); 11800Sstevel@tonic-gate } 11810Sstevel@tonic-gate 11820Sstevel@tonic-gate 11830Sstevel@tonic-gate /* 11840Sstevel@tonic-gate * Common code to remove an IPC object. This should be called after 11850Sstevel@tonic-gate * all permissions checks have been performed, and with the service 11860Sstevel@tonic-gate * and ID locked. Note that this does not remove the object from 11870Sstevel@tonic-gate * the ipcs_usedids list (this needs to be done by the caller before 11880Sstevel@tonic-gate * dropping the service lock). 11890Sstevel@tonic-gate */ 11900Sstevel@tonic-gate static void 11910Sstevel@tonic-gate ipc_remove(ipc_service_t *service, kipc_perm_t *perm) 11920Sstevel@tonic-gate { 11930Sstevel@tonic-gate int id = perm->ipc_id; 11940Sstevel@tonic-gate int index; 11950Sstevel@tonic-gate 11960Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock)); 11970Sstevel@tonic-gate ASSERT(IPC_LOCKED(service, perm)); 11980Sstevel@tonic-gate 11990Sstevel@tonic-gate index = IPC_INDEX(id); 12000Sstevel@tonic-gate 12010Sstevel@tonic-gate service->ipcs_table[index].ipct_data = NULL; 12020Sstevel@tonic-gate 12030Sstevel@tonic-gate if (perm->ipc_key != IPC_PRIVATE) 12040Sstevel@tonic-gate avl_remove(&service->ipcs_keys, perm); 12050Sstevel@tonic-gate list_remove(&service->ipcs_usedids, perm); 12060Sstevel@tonic-gate perm->ipc_mode &= ~IPC_ALLOC; 12070Sstevel@tonic-gate 12080Sstevel@tonic-gate id_free(service->ipcs_ids, index); 12090Sstevel@tonic-gate 12100Sstevel@tonic-gate if (service->ipcs_table[index].ipct_seq++ == IPC_SEQ_MASK) 12110Sstevel@tonic-gate service->ipcs_table[index].ipct_seq = 0; 12120Sstevel@tonic-gate service->ipcs_count--; 1213*2677Sml93401 ASSERT(IPC_PROJ_USAGE(perm, service) > 0); 1214*2677Sml93401 ASSERT(IPC_ZONE_USAGE(perm, service) > 0); 1215*2677Sml93401 IPC_PROJ_USAGE(perm, service) -= 1; 1216*2677Sml93401 IPC_ZONE_USAGE(perm, service) -= 1; 1217*2677Sml93401 ASSERT(service->ipcs_count || ((IPC_PROJ_USAGE(perm, service) == 0) && 1218*2677Sml93401 (IPC_ZONE_USAGE(perm, service) == 0))); 12190Sstevel@tonic-gate } 12200Sstevel@tonic-gate 12210Sstevel@tonic-gate 12220Sstevel@tonic-gate /* 12230Sstevel@tonic-gate * Common code to perform an IPC_RMID. Returns an errno value on 12240Sstevel@tonic-gate * failure, 0 on success. 12250Sstevel@tonic-gate */ 12260Sstevel@tonic-gate int 12270Sstevel@tonic-gate ipc_rmid(ipc_service_t *service, int id, cred_t *cr) 12280Sstevel@tonic-gate { 12290Sstevel@tonic-gate kipc_perm_t *perm; 12300Sstevel@tonic-gate kmutex_t *lock; 12310Sstevel@tonic-gate 12320Sstevel@tonic-gate mutex_enter(&service->ipcs_lock); 12330Sstevel@tonic-gate 12340Sstevel@tonic-gate lock = ipc_lookup(service, id, &perm); 12350Sstevel@tonic-gate if (lock == NULL) { 12360Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 12370Sstevel@tonic-gate return (EINVAL); 12380Sstevel@tonic-gate } 12390Sstevel@tonic-gate 12400Sstevel@tonic-gate ASSERT(service->ipcs_count > 0); 12410Sstevel@tonic-gate 12420Sstevel@tonic-gate if (secpolicy_ipc_owner(cr, perm) != 0) { 12430Sstevel@tonic-gate mutex_exit(lock); 12440Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 12450Sstevel@tonic-gate return (EPERM); 12460Sstevel@tonic-gate } 12470Sstevel@tonic-gate 12480Sstevel@tonic-gate /* 12490Sstevel@tonic-gate * Nothing can fail from this point on. 12500Sstevel@tonic-gate */ 12510Sstevel@tonic-gate ipc_remove(service, perm); 12520Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 12530Sstevel@tonic-gate 12540Sstevel@tonic-gate /* perform any per-service removal actions */ 12550Sstevel@tonic-gate service->ipcs_rmid(perm); 12560Sstevel@tonic-gate 12570Sstevel@tonic-gate ipc_rele(service, perm); 12580Sstevel@tonic-gate 12590Sstevel@tonic-gate return (0); 12600Sstevel@tonic-gate } 12610Sstevel@tonic-gate 12620Sstevel@tonic-gate /* 12630Sstevel@tonic-gate * Implementation for shmids, semids, and msgids. buf is the address 12640Sstevel@tonic-gate * of the user buffer, nids is the size, and pnids is a pointer to 12650Sstevel@tonic-gate * where we write the actual number of ids that [would] have been 12660Sstevel@tonic-gate * copied out. 12670Sstevel@tonic-gate */ 12680Sstevel@tonic-gate int 12690Sstevel@tonic-gate ipc_ids(ipc_service_t *service, int *buf, uint_t nids, uint_t *pnids) 12700Sstevel@tonic-gate { 12710Sstevel@tonic-gate kipc_perm_t *perm; 12720Sstevel@tonic-gate size_t idsize = 0; 12730Sstevel@tonic-gate int error = 0; 12740Sstevel@tonic-gate int idcount; 12750Sstevel@tonic-gate int *ids; 12760Sstevel@tonic-gate int numids = 0; 12770Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 12780Sstevel@tonic-gate int global = INGLOBALZONE(curproc); 12790Sstevel@tonic-gate 12800Sstevel@tonic-gate if (buf == NULL) 12810Sstevel@tonic-gate nids = 0; 12820Sstevel@tonic-gate 12830Sstevel@tonic-gate /* 12840Sstevel@tonic-gate * Get an accurate count of the total number of ids, and allocate a 12850Sstevel@tonic-gate * staging buffer. Since ipcs_count is always sane, we don't have 12860Sstevel@tonic-gate * to take ipcs_lock for our first guess. If there are no ids, or 12870Sstevel@tonic-gate * we're in the global zone and the number of ids is greater than 12880Sstevel@tonic-gate * the size of the specified buffer, we shunt to the end. Otherwise, 12890Sstevel@tonic-gate * we go through the id list looking for (and counting) what is 12900Sstevel@tonic-gate * visible in the specified zone. 12910Sstevel@tonic-gate */ 12920Sstevel@tonic-gate idcount = service->ipcs_count; 12930Sstevel@tonic-gate for (;;) { 12940Sstevel@tonic-gate if ((global && idcount > nids) || idcount == 0) { 12950Sstevel@tonic-gate numids = idcount; 12960Sstevel@tonic-gate nids = 0; 12970Sstevel@tonic-gate goto out; 12980Sstevel@tonic-gate } 12990Sstevel@tonic-gate 13000Sstevel@tonic-gate idsize = idcount * sizeof (int); 13010Sstevel@tonic-gate ids = kmem_alloc(idsize, KM_SLEEP); 13020Sstevel@tonic-gate 13030Sstevel@tonic-gate mutex_enter(&service->ipcs_lock); 13040Sstevel@tonic-gate if (idcount >= service->ipcs_count) 13050Sstevel@tonic-gate break; 13060Sstevel@tonic-gate idcount = service->ipcs_count; 13070Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 13080Sstevel@tonic-gate 13090Sstevel@tonic-gate if (idsize != 0) { 13100Sstevel@tonic-gate kmem_free(ids, idsize); 13110Sstevel@tonic-gate idsize = 0; 13120Sstevel@tonic-gate } 13130Sstevel@tonic-gate } 13140Sstevel@tonic-gate 13150Sstevel@tonic-gate for (perm = list_head(&service->ipcs_usedids); perm != NULL; 13160Sstevel@tonic-gate perm = list_next(&service->ipcs_usedids, perm)) { 13170Sstevel@tonic-gate ASSERT(!IPC_FREE(perm)); 13180Sstevel@tonic-gate if (global || perm->ipc_zoneid == zoneid) 13190Sstevel@tonic-gate ids[numids++] = perm->ipc_id; 13200Sstevel@tonic-gate } 13210Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 13220Sstevel@tonic-gate 13230Sstevel@tonic-gate /* 13240Sstevel@tonic-gate * If there isn't enough space to hold all of the ids, just 13250Sstevel@tonic-gate * return the number of ids without copying out any of them. 13260Sstevel@tonic-gate */ 13270Sstevel@tonic-gate if (nids < numids) 13280Sstevel@tonic-gate nids = 0; 13290Sstevel@tonic-gate 13300Sstevel@tonic-gate out: 13310Sstevel@tonic-gate if (suword32(pnids, (uint32_t)numids) || 13320Sstevel@tonic-gate (nids != 0 && copyout(ids, buf, numids * sizeof (int)))) 13330Sstevel@tonic-gate error = EFAULT; 13340Sstevel@tonic-gate if (idsize != 0) 13350Sstevel@tonic-gate kmem_free(ids, idsize); 13360Sstevel@tonic-gate return (error); 13370Sstevel@tonic-gate } 13380Sstevel@tonic-gate 13390Sstevel@tonic-gate /* 13400Sstevel@tonic-gate * Destroy IPC objects from the given service that are associated with 13410Sstevel@tonic-gate * the given zone. 13420Sstevel@tonic-gate * 13430Sstevel@tonic-gate * We can't hold on to the service lock when freeing objects, so we 13440Sstevel@tonic-gate * first search the service and move all the objects to a private 13450Sstevel@tonic-gate * list, then walk through and free them after dropping the lock. 13460Sstevel@tonic-gate */ 13470Sstevel@tonic-gate void 13480Sstevel@tonic-gate ipc_remove_zone(ipc_service_t *service, zoneid_t zoneid) 13490Sstevel@tonic-gate { 13500Sstevel@tonic-gate kipc_perm_t *perm, *next; 13510Sstevel@tonic-gate list_t rmlist; 13520Sstevel@tonic-gate kmutex_t *lock; 13530Sstevel@tonic-gate 13540Sstevel@tonic-gate list_create(&rmlist, sizeof (kipc_perm_t), 13550Sstevel@tonic-gate offsetof(kipc_perm_t, ipc_list)); 13560Sstevel@tonic-gate 13570Sstevel@tonic-gate mutex_enter(&service->ipcs_lock); 13580Sstevel@tonic-gate for (perm = list_head(&service->ipcs_usedids); perm != NULL; 13590Sstevel@tonic-gate perm = next) { 13600Sstevel@tonic-gate next = list_next(&service->ipcs_usedids, perm); 13610Sstevel@tonic-gate if (perm->ipc_zoneid != zoneid) 13620Sstevel@tonic-gate continue; 13630Sstevel@tonic-gate 13640Sstevel@tonic-gate /* 13650Sstevel@tonic-gate * Remove the object from the service, then put it on 13660Sstevel@tonic-gate * the removal list so we can defer the call to 13670Sstevel@tonic-gate * ipc_rele (which will actually free the structure). 13680Sstevel@tonic-gate * We need to do this since the destructor may grab 13690Sstevel@tonic-gate * the service lock. 13700Sstevel@tonic-gate */ 13710Sstevel@tonic-gate ASSERT(!IPC_FREE(perm)); 13720Sstevel@tonic-gate lock = ipc_lock(service, perm->ipc_id); 13730Sstevel@tonic-gate ipc_remove(service, perm); 13740Sstevel@tonic-gate mutex_exit(lock); 13750Sstevel@tonic-gate list_insert_tail(&rmlist, perm); 13760Sstevel@tonic-gate } 13770Sstevel@tonic-gate mutex_exit(&service->ipcs_lock); 13780Sstevel@tonic-gate 13790Sstevel@tonic-gate /* 13800Sstevel@tonic-gate * Now that we've dropped the service lock, loop through the 13810Sstevel@tonic-gate * private list freeing removed objects. 13820Sstevel@tonic-gate */ 13830Sstevel@tonic-gate for (perm = list_head(&rmlist); perm != NULL; perm = next) { 13840Sstevel@tonic-gate next = list_next(&rmlist, perm); 13850Sstevel@tonic-gate list_remove(&rmlist, perm); 13860Sstevel@tonic-gate 13870Sstevel@tonic-gate (void) ipc_lock(service, perm->ipc_id); 13880Sstevel@tonic-gate 13890Sstevel@tonic-gate /* perform any per-service removal actions */ 13900Sstevel@tonic-gate service->ipcs_rmid(perm); 13910Sstevel@tonic-gate 13920Sstevel@tonic-gate /* release reference */ 13930Sstevel@tonic-gate ipc_rele(service, perm); 13940Sstevel@tonic-gate } 13950Sstevel@tonic-gate 13960Sstevel@tonic-gate list_destroy(&rmlist); 13970Sstevel@tonic-gate } 1398