10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52677Sml93401 * Common Development and Distribution License (the "License").
62677Sml93401 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*13096SJordan.Vaughan@Sun.com * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate
2511861SMarek.Pospisil@Sun.COM /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
2611861SMarek.Pospisil@Sun.COM /* All Rights Reserved */
270Sstevel@tonic-gate
280Sstevel@tonic-gate
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate * Common Inter-Process Communication routines.
310Sstevel@tonic-gate *
320Sstevel@tonic-gate * Overview
330Sstevel@tonic-gate * --------
340Sstevel@tonic-gate *
350Sstevel@tonic-gate * The System V inter-process communication (IPC) facilities provide
360Sstevel@tonic-gate * three services, message queues, semaphore arrays, and shared memory
370Sstevel@tonic-gate * segments, which are mananged using filesystem-like namespaces.
380Sstevel@tonic-gate * Unlike a filesystem, these namespaces aren't mounted and accessible
390Sstevel@tonic-gate * via a path -- a special API is used to interact with the different
400Sstevel@tonic-gate * facilities (nothing precludes a VFS-based interface, but the
410Sstevel@tonic-gate * standards require the special APIs). Furthermore, these special
420Sstevel@tonic-gate * APIs don't use file descriptors, nor do they have an equivalent.
430Sstevel@tonic-gate * This means that every operation which acts on an object needs to
440Sstevel@tonic-gate * perform the quivalent of a lookup, which in turn means that every
450Sstevel@tonic-gate * operation can fail if the specified object doesn't exist in the
460Sstevel@tonic-gate * facility's namespace.
470Sstevel@tonic-gate *
480Sstevel@tonic-gate * Objects
490Sstevel@tonic-gate * -------
500Sstevel@tonic-gate *
510Sstevel@tonic-gate * Each object in a namespace has a unique ID, which is assigned by the
520Sstevel@tonic-gate * system and is used to identify the object when performing operations
530Sstevel@tonic-gate * on it. An object can also have a key, which is selected by the user
540Sstevel@tonic-gate * at allocation time and is used as a primitive rendezvous mechanism.
550Sstevel@tonic-gate * An object without a key is said to have a "private" key.
560Sstevel@tonic-gate *
570Sstevel@tonic-gate * To perform an operation on an object given its key, one must first
580Sstevel@tonic-gate * perform a lookup and obtain its ID. The ID is then used to identify
590Sstevel@tonic-gate * the object when performing the operation. If the object has a
600Sstevel@tonic-gate * private key, the ID must be known or obtained by other means.
610Sstevel@tonic-gate *
620Sstevel@tonic-gate * Each object in the namespace has a creator uid and gid, as well as
630Sstevel@tonic-gate * an owner uid and gid. Both are initialized with the ruid and rgid
640Sstevel@tonic-gate * of the process which created the object. The creator or current
650Sstevel@tonic-gate * owner has the ability to change the owner of the object.
660Sstevel@tonic-gate *
670Sstevel@tonic-gate * Each object in the namespace has a set of file-like permissions,
680Sstevel@tonic-gate * which, in conjunction with the creator and owner uid and gid,
690Sstevel@tonic-gate * control read and write access to the object (execute is ignored).
700Sstevel@tonic-gate *
712677Sml93401 * Each object also has a creator project and zone, which are used to
722677Sml93401 * account for its resource usage.
730Sstevel@tonic-gate *
740Sstevel@tonic-gate * Operations
750Sstevel@tonic-gate * ----------
760Sstevel@tonic-gate *
770Sstevel@tonic-gate * There are five operations which all three facilities have in
780Sstevel@tonic-gate * common: GET, SET, STAT, RMID, and IDS.
790Sstevel@tonic-gate *
800Sstevel@tonic-gate * GET, like open, is used to allocate a new object or obtain an
810Sstevel@tonic-gate * existing one (using its key). It takes a key, a set of flags and
820Sstevel@tonic-gate * mode bits, and optionally facility-specific arguments. If the key
830Sstevel@tonic-gate * is IPC_PRIVATE, a new object with the requested mode bits and
840Sstevel@tonic-gate * facility-specific attributes is created. If the key isn't
850Sstevel@tonic-gate * IPC_PRIVATE, the GET will attempt to look up the specified key and
860Sstevel@tonic-gate * either return that or create a new key depending on the state of the
870Sstevel@tonic-gate * IPC_CREAT and IPC_EXCL flags, much like open. If GET needs to
880Sstevel@tonic-gate * allocate an object, it can fail if there is insufficient space in
890Sstevel@tonic-gate * the namespace (the maximum number of ids for the facility has been
900Sstevel@tonic-gate * exceeded) or if the facility-specific initialization fails. If GET
910Sstevel@tonic-gate * finds an object it can return, it can still fail if that object's
920Sstevel@tonic-gate * permissions or facility-specific attributes are less than those
930Sstevel@tonic-gate * requested.
940Sstevel@tonic-gate *
950Sstevel@tonic-gate * SET is used to adjust facility-specific parameters of an object, in
960Sstevel@tonic-gate * addition to the owner uid and gid, and mode bits. It can fail if
970Sstevel@tonic-gate * the caller isn't the creator or owner.
980Sstevel@tonic-gate *
990Sstevel@tonic-gate * STAT is used to obtain information about an object including the
1000Sstevel@tonic-gate * general attributes object described as well as facility-specific
1010Sstevel@tonic-gate * information. It can fail if the caller doesn't have read
1020Sstevel@tonic-gate * permission.
1030Sstevel@tonic-gate *
1040Sstevel@tonic-gate * RMID removes an object from the namespace. Subsequent operations
1050Sstevel@tonic-gate * using the object's ID or key will fail (until another object is
1060Sstevel@tonic-gate * created with the same key or ID). Since an RMID may be performed
1070Sstevel@tonic-gate * asynchronously with other operations, it is possible that other
1080Sstevel@tonic-gate * threads and/or processes will have references to the object. While
1090Sstevel@tonic-gate * a facility may have actions which need to be performed at RMID time,
1100Sstevel@tonic-gate * only when all references are dropped can the object be destroyed.
1110Sstevel@tonic-gate * RMID will fail if the caller isn't the creator or owner.
1120Sstevel@tonic-gate *
1130Sstevel@tonic-gate * IDS obtains a list of all IDs in a facility's namespace. There are
1140Sstevel@tonic-gate * no facility-specific behaviors of IDS.
1150Sstevel@tonic-gate *
1160Sstevel@tonic-gate * Design
1170Sstevel@tonic-gate * ------
1180Sstevel@tonic-gate *
1190Sstevel@tonic-gate * Because some IPC facilities provide services whose operations must
1200Sstevel@tonic-gate * scale, a mechanism which allows fast, concurrent access to
1210Sstevel@tonic-gate * individual objects is needed. Of primary importance is object
1220Sstevel@tonic-gate * lookup based on ID (SET, STAT, others). Allocation (GET),
1230Sstevel@tonic-gate * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are
1240Sstevel@tonic-gate * lesser concerns, but should be implemented in such a way that ID
1250Sstevel@tonic-gate * lookup isn't affected (at least not in the common case).
1260Sstevel@tonic-gate *
1270Sstevel@tonic-gate * Starting from the bottom up, each object is represented by a
1280Sstevel@tonic-gate * structure, the first member of which must be a kipc_perm_t. The
1290Sstevel@tonic-gate * kipc_perm_t contains the information described above in "Objects", a
1300Sstevel@tonic-gate * reference count (since the object may continue to exist after it has
1310Sstevel@tonic-gate * been removed from the namespace), as well as some additional
1320Sstevel@tonic-gate * metadata used to manage data structure membership. These objects
1330Sstevel@tonic-gate * are dynamically allocated.
1340Sstevel@tonic-gate *
1350Sstevel@tonic-gate * Above the objects is a power-of-two sized table of ID slots. Each
1360Sstevel@tonic-gate * slot contains a pointer to an object, a sequence number, and a
1370Sstevel@tonic-gate * lock. An object's ID is a function of its slot's index in the table
1380Sstevel@tonic-gate * and its slot's sequence number. Every time a slot is released (via
1390Sstevel@tonic-gate * RMID) its sequence number is increased. Strictly speaking, the
1400Sstevel@tonic-gate * sequence number is unnecessary. However, checking the sequence
1410Sstevel@tonic-gate * number after a lookup provides a certain degree of robustness
1420Sstevel@tonic-gate * against the use of stale IDs (useful since nothing else does). When
1430Sstevel@tonic-gate * the table fills up, it is resized (see Locking, below).
1440Sstevel@tonic-gate *
1450Sstevel@tonic-gate * Of an ID's 31 bits (an ID is, as defined by the standards, a signed
1460Sstevel@tonic-gate * int) the top IPC_SEQ_BITS are used for the sequence number with the
1470Sstevel@tonic-gate * remainder holding the index into the table. The size of the table
1480Sstevel@tonic-gate * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots.
1490Sstevel@tonic-gate *
1500Sstevel@tonic-gate * Managing this table is the ipc_service structure. It contains a
1510Sstevel@tonic-gate * pointer to the dynamically allocated ID table, a namespace-global
1520Sstevel@tonic-gate * lock, an id_space for managing the free space in the table, and
1530Sstevel@tonic-gate * sundry other metadata necessary for the maintenance of the
1540Sstevel@tonic-gate * namespace. An AVL tree of all keyed objects in the table (sorted by
1550Sstevel@tonic-gate * key) is used for key lookups. An unordered doubly linked list of
1560Sstevel@tonic-gate * all objects in the namespace (keyed or not) is maintained to
1570Sstevel@tonic-gate * facilitate ID enumeration.
1580Sstevel@tonic-gate *
1590Sstevel@tonic-gate * To help visualize these relationships, here's a picture of a
1600Sstevel@tonic-gate * namespace with a table of size 8 containing three objects
1610Sstevel@tonic-gate * (IPC_SEQ_BITS = 28):
1620Sstevel@tonic-gate *
1630Sstevel@tonic-gate *
1640Sstevel@tonic-gate * +-ipc_service_t--+
1650Sstevel@tonic-gate * | table *---\
1660Sstevel@tonic-gate * | keys *---+----------------------\
1670Sstevel@tonic-gate * | all ids *--\| |
1680Sstevel@tonic-gate * | | || |
1690Sstevel@tonic-gate * +----------------+ || |
1700Sstevel@tonic-gate * || |
1710Sstevel@tonic-gate * /-------------------/| |
1720Sstevel@tonic-gate * | /---------------/ |
1730Sstevel@tonic-gate * | | |
1740Sstevel@tonic-gate * | v |
1750Sstevel@tonic-gate * | +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+
1760Sstevel@tonic-gate * | | Seq=3 | | | Seq=1 | : | | | Seq=6 |
1770Sstevel@tonic-gate * | | | | | | : | | | |
1780Sstevel@tonic-gate * | +-*------+--------+--------+-*------+----+---+--------+--------+-*------+
1790Sstevel@tonic-gate * | | | | |
1800Sstevel@tonic-gate * | | /---/ | /----------------/
1810Sstevel@tonic-gate * | | | | |
1820Sstevel@tonic-gate * | v v | v
1830Sstevel@tonic-gate * | +-kipc_perm_t-+ +-kipc_perm_t-+ | +-kipc_perm_t-+
1840Sstevel@tonic-gate * | | id=0x30 | | id=0x13 | | | id=0x67 |
1850Sstevel@tonic-gate * | | key=0xfeed | | key=0xbeef | | | key=0xcafe |
1860Sstevel@tonic-gate * \->| [list] |<------>| [list] |<------>| [list] |
1870Sstevel@tonic-gate * /->| [avl left] x /--->| [avl left] x \--->| [avl left] *---\
1880Sstevel@tonic-gate * | | [avl right] x | | [avl right] x | [avl right] *---+-\
1890Sstevel@tonic-gate * | | | | | | | | | |
1900Sstevel@tonic-gate * | +-------------+ | +-------------+ +-------------+ | |
1910Sstevel@tonic-gate * | \---------------------------------------------/ |
1920Sstevel@tonic-gate * \--------------------------------------------------------------------/
1930Sstevel@tonic-gate *
1940Sstevel@tonic-gate * Locking
1950Sstevel@tonic-gate * -------
1960Sstevel@tonic-gate *
1970Sstevel@tonic-gate * There are three locks (or sets of locks) which are used to ensure
1980Sstevel@tonic-gate * correctness: the slot locks, the namespace lock, and p_lock (needed
1990Sstevel@tonic-gate * when checking resource controls). Their ordering is
2000Sstevel@tonic-gate *
2010Sstevel@tonic-gate * namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock
2020Sstevel@tonic-gate *
2030Sstevel@tonic-gate * Generally speaking, the namespace lock is used to protect allocation
2040Sstevel@tonic-gate * and removal from the namespace, ID enumeration, and resizing the ID
2050Sstevel@tonic-gate * table. Specifically:
2060Sstevel@tonic-gate *
2070Sstevel@tonic-gate * - write access to all fields of the ipc_service structure
2080Sstevel@tonic-gate * - read access to all variable fields of ipc_service except
2090Sstevel@tonic-gate * ipcs_tabsz (table size) and ipcs_table (the table pointer)
2100Sstevel@tonic-gate * - read/write access to ipc_avl, ipc_list in visible objects'
2110Sstevel@tonic-gate * kipc_perm structures (i.e. objects which have been removed from
2120Sstevel@tonic-gate * the namespace don't have this restriction)
2130Sstevel@tonic-gate * - write access to ipct_seq and ipct_data in the table entries
2140Sstevel@tonic-gate *
2150Sstevel@tonic-gate * A slot lock by itself is meaningless (except when resizing). Of
2160Sstevel@tonic-gate * greater interest conceptually is the notion of an ID lock -- a
2170Sstevel@tonic-gate * "virtual lock" which refers to whichever slot lock an object's ID
2180Sstevel@tonic-gate * currently hashes to.
2190Sstevel@tonic-gate *
2200Sstevel@tonic-gate * An ID lock protects all objects with that ID. Normally there will
2210Sstevel@tonic-gate * only be one such object: the one pointed to by the locked slot.
2220Sstevel@tonic-gate * However, if an object is removed from the namespace but retains
2230Sstevel@tonic-gate * references (e.g. an attached shared memory segment which has been
2240Sstevel@tonic-gate * RMIDed), it continues to use the lock associated with its original
2250Sstevel@tonic-gate * ID. While this can result in increased contention, operations which
2260Sstevel@tonic-gate * require taking the ID lock of removed objects are infrequent.
2270Sstevel@tonic-gate *
2280Sstevel@tonic-gate * Specifically, an ID lock protects the contents of an object's
2290Sstevel@tonic-gate * structure, including the contents of the embedded kipc_perm
2300Sstevel@tonic-gate * structure (but excluding those fields protected by the namespace
2310Sstevel@tonic-gate * lock). It also protects the ipct_seq and ipct_data fields in its
2320Sstevel@tonic-gate * slot (it is really a slot lock, after all).
2330Sstevel@tonic-gate *
2340Sstevel@tonic-gate * Recall that the table is resizable. To avoid requiring every ID
2350Sstevel@tonic-gate * lookup to take a global lock, a scheme much like that employed for
2360Sstevel@tonic-gate * file descriptors (see the comment above UF_ENTER in user.h) is
2370Sstevel@tonic-gate * used. Note that the sequence number and data pointer are protected
2380Sstevel@tonic-gate * by both the namespace lock and their slot lock. When the table is
2390Sstevel@tonic-gate * resized, the following operations take place:
2400Sstevel@tonic-gate *
2410Sstevel@tonic-gate * 1) A new table is allocated.
2420Sstevel@tonic-gate * 2) The global lock is taken.
2430Sstevel@tonic-gate * 3) All old slots are locked, in order.
2440Sstevel@tonic-gate * 4) The first half of the new slots are locked.
2450Sstevel@tonic-gate * 5) All table entries are copied to the new table, and cleared from
2460Sstevel@tonic-gate * the old table.
2470Sstevel@tonic-gate * 6) The ipc_service structure is updated to point to the new table.
2480Sstevel@tonic-gate * 7) The ipc_service structure is updated with the new table size.
2490Sstevel@tonic-gate * 8) All slot locks (old and new) are dropped.
2500Sstevel@tonic-gate *
2510Sstevel@tonic-gate * Because the slot locks are embedded in the table, ID lookups and
2520Sstevel@tonic-gate * other operations which require taking an slot lock need to verify
2530Sstevel@tonic-gate * that the lock taken wasn't part of a stale table. This is
2540Sstevel@tonic-gate * accomplished by checking the table size before and after
2550Sstevel@tonic-gate * dereferencing the table pointer and taking the lock: if the size
2560Sstevel@tonic-gate * changes, the lock must be dropped and reacquired. It is this
2570Sstevel@tonic-gate * additional work which distinguishes an ID lock from a slot lock.
2580Sstevel@tonic-gate *
2590Sstevel@tonic-gate * Because we can't guarantee that threads aren't accessing the old
2600Sstevel@tonic-gate * tables' locks, they are never deallocated. To prevent spurious
2610Sstevel@tonic-gate * reports of memory leaks, a pointer to the discarded table is stored
2620Sstevel@tonic-gate * in the new one in step 5. (Theoretically ipcs_destroy will delete
2630Sstevel@tonic-gate * the discarded tables, but it is only ever called from a failed _init
2640Sstevel@tonic-gate * invocation; i.e. when there aren't any.)
2650Sstevel@tonic-gate *
2660Sstevel@tonic-gate * Interfaces
2670Sstevel@tonic-gate * ----------
2680Sstevel@tonic-gate *
2690Sstevel@tonic-gate * The following interfaces are provided by the ipc module for use by
2700Sstevel@tonic-gate * the individual IPC facilities:
2710Sstevel@tonic-gate *
2720Sstevel@tonic-gate * ipcperm_access
2730Sstevel@tonic-gate *
2740Sstevel@tonic-gate * Given an object and a cred structure, determines if the requested
2750Sstevel@tonic-gate * access type is allowed.
2760Sstevel@tonic-gate *
2770Sstevel@tonic-gate * ipcperm_set, ipcperm_stat,
2780Sstevel@tonic-gate * ipcperm_set64, ipcperm_stat64
2790Sstevel@tonic-gate *
2800Sstevel@tonic-gate * Performs the common portion of an STAT or SET operation. All
2810Sstevel@tonic-gate * (except stat and stat64) can fail, so they should be called before
2820Sstevel@tonic-gate * any facility-specific non-reversible changes are made to an
2830Sstevel@tonic-gate * object. Similarly, the set operations have side effects, so they
2840Sstevel@tonic-gate * should only be called once the possibility of a facility-specific
2850Sstevel@tonic-gate * failure is eliminated.
2860Sstevel@tonic-gate *
2870Sstevel@tonic-gate * ipcs_create
2880Sstevel@tonic-gate *
2890Sstevel@tonic-gate * Creates an IPC namespace for use by an IPC facility.
2900Sstevel@tonic-gate *
2910Sstevel@tonic-gate * ipcs_destroy
2920Sstevel@tonic-gate *
2930Sstevel@tonic-gate * Destroys an IPC namespace.
2940Sstevel@tonic-gate *
2950Sstevel@tonic-gate * ipcs_lock, ipcs_unlock
2960Sstevel@tonic-gate *
2970Sstevel@tonic-gate * Takes the namespace lock. Ideally such access wouldn't be
2980Sstevel@tonic-gate * necessary, but there may be facility-specific data protected by
2990Sstevel@tonic-gate * this lock (e.g. project-wide resource consumption).
3000Sstevel@tonic-gate *
3010Sstevel@tonic-gate * ipc_lock
3020Sstevel@tonic-gate *
3030Sstevel@tonic-gate * Takes the lock associated with an ID. Can't fail.
3040Sstevel@tonic-gate *
3050Sstevel@tonic-gate * ipc_relock
3060Sstevel@tonic-gate *
3070Sstevel@tonic-gate * Like ipc_lock, but takes a pointer to a held lock. Drops the lock
3080Sstevel@tonic-gate * unless it is the one that would have been returned by ipc_lock.
3090Sstevel@tonic-gate * Used after calls to cv_wait.
3100Sstevel@tonic-gate *
3110Sstevel@tonic-gate * ipc_lookup
3120Sstevel@tonic-gate *
3130Sstevel@tonic-gate * Performs an ID lookup, returns with the ID lock held. Fails if
3140Sstevel@tonic-gate * the ID doesn't exist in the namespace.
3150Sstevel@tonic-gate *
3160Sstevel@tonic-gate * ipc_hold
3170Sstevel@tonic-gate *
3180Sstevel@tonic-gate * Takes a reference on an object.
3190Sstevel@tonic-gate *
3200Sstevel@tonic-gate * ipc_rele
3210Sstevel@tonic-gate *
3220Sstevel@tonic-gate * Releases a reference on an object, and drops the object's lock.
3230Sstevel@tonic-gate * Calls the object's destructor if last reference is being
3240Sstevel@tonic-gate * released.
3250Sstevel@tonic-gate *
3260Sstevel@tonic-gate * ipc_rele_locked
3270Sstevel@tonic-gate *
3280Sstevel@tonic-gate * Releases a reference on an object. Doesn't drop lock, and may
3290Sstevel@tonic-gate * only be called when there is more than one reference to the
3300Sstevel@tonic-gate * object.
3310Sstevel@tonic-gate *
3320Sstevel@tonic-gate * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup
3330Sstevel@tonic-gate *
3340Sstevel@tonic-gate * Components of a GET operation. ipc_get performs a key lookup,
3350Sstevel@tonic-gate * allocating an object if the key isn't found (returning with the
3360Sstevel@tonic-gate * namespace lock and p_lock held), and returning the existing object
3370Sstevel@tonic-gate * if it is (with the object lock held). ipc_get doesn't modify the
3380Sstevel@tonic-gate * namespace.
3390Sstevel@tonic-gate *
3400Sstevel@tonic-gate * ipc_commit_begin begins the process of inserting an object
3410Sstevel@tonic-gate * allocated by ipc_get into the namespace, and can fail. If
3420Sstevel@tonic-gate * successful, it returns with the namespace lock and p_lock held.
3430Sstevel@tonic-gate * ipc_commit_end completes the process of inserting an object into
3440Sstevel@tonic-gate * the namespace and can't fail. The facility can call ipc_cleanup
3450Sstevel@tonic-gate * at any time following a successful ipc_get and before
3460Sstevel@tonic-gate * ipc_commit_end or a failed ipc_commit_begin to fail the
3470Sstevel@tonic-gate * allocation. Pseudocode for the suggested GET implementation:
3480Sstevel@tonic-gate *
3490Sstevel@tonic-gate * top:
3500Sstevel@tonic-gate *
3510Sstevel@tonic-gate * ipc_get
3520Sstevel@tonic-gate *
3530Sstevel@tonic-gate * if failure
3540Sstevel@tonic-gate * return
3550Sstevel@tonic-gate *
3560Sstevel@tonic-gate * if found {
3570Sstevel@tonic-gate *
3580Sstevel@tonic-gate * if object meets criteria
3590Sstevel@tonic-gate * unlock object and return success
3600Sstevel@tonic-gate * else
3610Sstevel@tonic-gate * unlock object and return failure
3620Sstevel@tonic-gate *
3630Sstevel@tonic-gate * } else {
3640Sstevel@tonic-gate *
3650Sstevel@tonic-gate * perform resource control tests
3660Sstevel@tonic-gate * drop namespace lock, p_lock
3670Sstevel@tonic-gate * if failure
3680Sstevel@tonic-gate * ipc_cleanup
3690Sstevel@tonic-gate *
3700Sstevel@tonic-gate * perform facility-specific initialization
3710Sstevel@tonic-gate * if failure {
3720Sstevel@tonic-gate * facility-specific cleanup
3730Sstevel@tonic-gate * ipc_cleanup
3740Sstevel@tonic-gate * }
3750Sstevel@tonic-gate *
3760Sstevel@tonic-gate * ( At this point the object should be destructible using the
3770Sstevel@tonic-gate * destructor given to ipcs_create )
3780Sstevel@tonic-gate *
3790Sstevel@tonic-gate * ipc_commit_begin
3800Sstevel@tonic-gate * if retry
3810Sstevel@tonic-gate * goto top
3820Sstevel@tonic-gate * else if failure
3830Sstevel@tonic-gate * return
3840Sstevel@tonic-gate *
3850Sstevel@tonic-gate * perform facility-specific resource control tests/allocations
3860Sstevel@tonic-gate * if failure
3870Sstevel@tonic-gate * ipc_cleanup
3880Sstevel@tonic-gate *
3890Sstevel@tonic-gate * ipc_commit_end
3900Sstevel@tonic-gate * perform any infallible post-creation actions, unlock, and return
3910Sstevel@tonic-gate *
3920Sstevel@tonic-gate * }
3930Sstevel@tonic-gate *
3940Sstevel@tonic-gate * ipc_rmid
3950Sstevel@tonic-gate *
3960Sstevel@tonic-gate * Performs the common portion of an RMID operation -- looks up an ID
3970Sstevel@tonic-gate * removes it, and calls the a facility-specific function to do
3980Sstevel@tonic-gate * RMID-time cleanup on the private portions of the object.
3990Sstevel@tonic-gate *
4000Sstevel@tonic-gate * ipc_ids
4010Sstevel@tonic-gate *
4020Sstevel@tonic-gate * Performs the common portion of an IDS operation.
4030Sstevel@tonic-gate *
4040Sstevel@tonic-gate */
4050Sstevel@tonic-gate
4060Sstevel@tonic-gate #include <sys/types.h>
4070Sstevel@tonic-gate #include <sys/param.h>
4080Sstevel@tonic-gate #include <sys/cred.h>
4090Sstevel@tonic-gate #include <sys/policy.h>
4100Sstevel@tonic-gate #include <sys/proc.h>
4110Sstevel@tonic-gate #include <sys/user.h>
4120Sstevel@tonic-gate #include <sys/ipc.h>
4130Sstevel@tonic-gate #include <sys/ipc_impl.h>
4140Sstevel@tonic-gate #include <sys/errno.h>
4150Sstevel@tonic-gate #include <sys/systm.h>
4160Sstevel@tonic-gate #include <sys/list.h>
4170Sstevel@tonic-gate #include <sys/atomic.h>
4180Sstevel@tonic-gate #include <sys/zone.h>
4190Sstevel@tonic-gate #include <sys/task.h>
4200Sstevel@tonic-gate #include <sys/modctl.h>
4210Sstevel@tonic-gate
4220Sstevel@tonic-gate #include <c2/audit.h>
4230Sstevel@tonic-gate
4240Sstevel@tonic-gate static struct modlmisc modlmisc = {
4250Sstevel@tonic-gate &mod_miscops,
4260Sstevel@tonic-gate "common ipc code",
4270Sstevel@tonic-gate };
4280Sstevel@tonic-gate
4290Sstevel@tonic-gate static struct modlinkage modlinkage = {
4300Sstevel@tonic-gate MODREV_1, (void *)&modlmisc, NULL
4310Sstevel@tonic-gate };
4320Sstevel@tonic-gate
4330Sstevel@tonic-gate
4340Sstevel@tonic-gate int
_init(void)4350Sstevel@tonic-gate _init(void)
4360Sstevel@tonic-gate {
4370Sstevel@tonic-gate return (mod_install(&modlinkage));
4380Sstevel@tonic-gate }
4390Sstevel@tonic-gate
4400Sstevel@tonic-gate int
_fini(void)4410Sstevel@tonic-gate _fini(void)
4420Sstevel@tonic-gate {
4430Sstevel@tonic-gate return (mod_remove(&modlinkage));
4440Sstevel@tonic-gate }
4450Sstevel@tonic-gate
4460Sstevel@tonic-gate int
_info(struct modinfo * modinfop)4470Sstevel@tonic-gate _info(struct modinfo *modinfop)
4480Sstevel@tonic-gate {
4490Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop));
4500Sstevel@tonic-gate }
4510Sstevel@tonic-gate
4520Sstevel@tonic-gate
4530Sstevel@tonic-gate /*
4540Sstevel@tonic-gate * Check message, semaphore, or shared memory access permissions.
4550Sstevel@tonic-gate *
4560Sstevel@tonic-gate * This routine verifies the requested access permission for the current
4570Sstevel@tonic-gate * process. The zone ids are compared, and the appropriate bits are
4580Sstevel@tonic-gate * checked corresponding to owner, group (including the list of
4590Sstevel@tonic-gate * supplementary groups), or everyone. Zero is returned on success.
4600Sstevel@tonic-gate * On failure, the security policy is asked to check to override the
4610Sstevel@tonic-gate * permissions check; the policy will either return 0 for access granted
4620Sstevel@tonic-gate * or EACCES.
4630Sstevel@tonic-gate *
4640Sstevel@tonic-gate * Access to objects in other zones requires that the caller be in the
4650Sstevel@tonic-gate * global zone and have the appropriate IPC_DAC_* privilege, regardless
4660Sstevel@tonic-gate * of whether the uid or gid match those of the object. Note that
4670Sstevel@tonic-gate * cross-zone accesses will normally never get here since they'll
4680Sstevel@tonic-gate * fail in ipc_lookup or ipc_get.
4690Sstevel@tonic-gate *
4700Sstevel@tonic-gate * The arguments must be set up as follows:
4710Sstevel@tonic-gate * p - Pointer to permission structure to verify
4720Sstevel@tonic-gate * mode - Desired access permissions
4730Sstevel@tonic-gate */
4740Sstevel@tonic-gate int
ipcperm_access(kipc_perm_t * p,int mode,cred_t * cr)4750Sstevel@tonic-gate ipcperm_access(kipc_perm_t *p, int mode, cred_t *cr)
4760Sstevel@tonic-gate {
4770Sstevel@tonic-gate int shifts = 0;
4780Sstevel@tonic-gate uid_t uid = crgetuid(cr);
4790Sstevel@tonic-gate zoneid_t zoneid = getzoneid();
4800Sstevel@tonic-gate
4810Sstevel@tonic-gate if (p->ipc_zoneid == zoneid) {
4820Sstevel@tonic-gate if (uid != p->ipc_uid && uid != p->ipc_cuid) {
4830Sstevel@tonic-gate shifts += 3;
4840Sstevel@tonic-gate if (!groupmember(p->ipc_gid, cr) &&
4850Sstevel@tonic-gate !groupmember(p->ipc_cgid, cr))
4860Sstevel@tonic-gate shifts += 3;
4870Sstevel@tonic-gate }
4880Sstevel@tonic-gate
4890Sstevel@tonic-gate mode &= ~(p->ipc_mode << shifts);
4900Sstevel@tonic-gate
4910Sstevel@tonic-gate if (mode == 0)
4920Sstevel@tonic-gate return (0);
4930Sstevel@tonic-gate } else if (zoneid != GLOBAL_ZONEID)
4940Sstevel@tonic-gate return (EACCES);
4950Sstevel@tonic-gate
4960Sstevel@tonic-gate return (secpolicy_ipc_access(cr, p, mode));
4970Sstevel@tonic-gate }
4980Sstevel@tonic-gate
4990Sstevel@tonic-gate /*
5000Sstevel@tonic-gate * There are two versions of the ipcperm_set/stat functions:
5010Sstevel@tonic-gate * ipcperm_??? - for use with IPC_SET/STAT
5020Sstevel@tonic-gate * ipcperm_???_64 - for use with IPC_SET64/STAT64
5030Sstevel@tonic-gate *
5040Sstevel@tonic-gate * These functions encapsulate the common portions (copying, permission
5050Sstevel@tonic-gate * checks, and auditing) of the set/stat operations. All, except for
5060Sstevel@tonic-gate * stat and stat_64 which are void, return 0 on success or a non-zero
5070Sstevel@tonic-gate * errno value on error.
5080Sstevel@tonic-gate */
5090Sstevel@tonic-gate
5100Sstevel@tonic-gate int
ipcperm_set(ipc_service_t * service,struct cred * cr,kipc_perm_t * kperm,struct ipc_perm * perm,model_t model)5110Sstevel@tonic-gate ipcperm_set(ipc_service_t *service, struct cred *cr,
5120Sstevel@tonic-gate kipc_perm_t *kperm, struct ipc_perm *perm, model_t model)
5130Sstevel@tonic-gate {
5140Sstevel@tonic-gate STRUCT_HANDLE(ipc_perm, lperm);
5150Sstevel@tonic-gate uid_t uid;
5160Sstevel@tonic-gate gid_t gid;
5170Sstevel@tonic-gate mode_t mode;
5185771Sjp151216 zone_t *zone;
5190Sstevel@tonic-gate
5200Sstevel@tonic-gate ASSERT(IPC_LOCKED(service, kperm));
5210Sstevel@tonic-gate
5220Sstevel@tonic-gate STRUCT_SET_HANDLE(lperm, model, perm);
5230Sstevel@tonic-gate uid = STRUCT_FGET(lperm, uid);
5240Sstevel@tonic-gate gid = STRUCT_FGET(lperm, gid);
5250Sstevel@tonic-gate mode = STRUCT_FGET(lperm, mode);
5260Sstevel@tonic-gate
5270Sstevel@tonic-gate if (secpolicy_ipc_owner(cr, kperm) != 0)
5280Sstevel@tonic-gate return (EPERM);
5290Sstevel@tonic-gate
5305771Sjp151216 zone = crgetzone(cr);
5315771Sjp151216 if (!VALID_UID(uid, zone) || !VALID_GID(gid, zone))
5320Sstevel@tonic-gate return (EINVAL);
5330Sstevel@tonic-gate
5340Sstevel@tonic-gate kperm->ipc_uid = uid;
5350Sstevel@tonic-gate kperm->ipc_gid = gid;
5360Sstevel@tonic-gate kperm->ipc_mode = (mode & 0777) | (kperm->ipc_mode & ~0777);
5370Sstevel@tonic-gate
53811861SMarek.Pospisil@Sun.COM if (AU_AUDITING())
5390Sstevel@tonic-gate audit_ipcget(service->ipcs_atype, kperm);
5400Sstevel@tonic-gate
5410Sstevel@tonic-gate return (0);
5420Sstevel@tonic-gate }
5430Sstevel@tonic-gate
5440Sstevel@tonic-gate void
ipcperm_stat(struct ipc_perm * perm,kipc_perm_t * kperm,model_t model)5450Sstevel@tonic-gate ipcperm_stat(struct ipc_perm *perm, kipc_perm_t *kperm, model_t model)
5460Sstevel@tonic-gate {
5470Sstevel@tonic-gate STRUCT_HANDLE(ipc_perm, lperm);
5480Sstevel@tonic-gate
5490Sstevel@tonic-gate STRUCT_SET_HANDLE(lperm, model, perm);
5500Sstevel@tonic-gate STRUCT_FSET(lperm, uid, kperm->ipc_uid);
5510Sstevel@tonic-gate STRUCT_FSET(lperm, gid, kperm->ipc_gid);
5520Sstevel@tonic-gate STRUCT_FSET(lperm, cuid, kperm->ipc_cuid);
5530Sstevel@tonic-gate STRUCT_FSET(lperm, cgid, kperm->ipc_cgid);
5540Sstevel@tonic-gate STRUCT_FSET(lperm, mode, kperm->ipc_mode);
5550Sstevel@tonic-gate STRUCT_FSET(lperm, seq, 0);
5560Sstevel@tonic-gate STRUCT_FSET(lperm, key, kperm->ipc_key);
5570Sstevel@tonic-gate }
5580Sstevel@tonic-gate
5590Sstevel@tonic-gate int
ipcperm_set64(ipc_service_t * service,struct cred * cr,kipc_perm_t * kperm,ipc_perm64_t * perm64)5600Sstevel@tonic-gate ipcperm_set64(ipc_service_t *service, struct cred *cr,
5610Sstevel@tonic-gate kipc_perm_t *kperm, ipc_perm64_t *perm64)
5620Sstevel@tonic-gate {
5635771Sjp151216 zone_t *zone;
5645771Sjp151216
5650Sstevel@tonic-gate ASSERT(IPC_LOCKED(service, kperm));
5660Sstevel@tonic-gate
5670Sstevel@tonic-gate if (secpolicy_ipc_owner(cr, kperm) != 0)
5680Sstevel@tonic-gate return (EPERM);
5690Sstevel@tonic-gate
5705771Sjp151216 zone = crgetzone(cr);
5715771Sjp151216 if (!VALID_UID(perm64->ipcx_uid, zone) ||
5725771Sjp151216 !VALID_GID(perm64->ipcx_gid, zone))
5730Sstevel@tonic-gate return (EINVAL);
5740Sstevel@tonic-gate
5750Sstevel@tonic-gate kperm->ipc_uid = perm64->ipcx_uid;
5760Sstevel@tonic-gate kperm->ipc_gid = perm64->ipcx_gid;
5770Sstevel@tonic-gate kperm->ipc_mode = (perm64->ipcx_mode & 0777) |
5780Sstevel@tonic-gate (kperm->ipc_mode & ~0777);
5790Sstevel@tonic-gate
58011861SMarek.Pospisil@Sun.COM if (AU_AUDITING())
5810Sstevel@tonic-gate audit_ipcget(service->ipcs_atype, kperm);
5820Sstevel@tonic-gate
5830Sstevel@tonic-gate return (0);
5840Sstevel@tonic-gate }
5850Sstevel@tonic-gate
5860Sstevel@tonic-gate void
ipcperm_stat64(ipc_perm64_t * perm64,kipc_perm_t * kperm)5870Sstevel@tonic-gate ipcperm_stat64(ipc_perm64_t *perm64, kipc_perm_t *kperm)
5880Sstevel@tonic-gate {
5890Sstevel@tonic-gate perm64->ipcx_uid = kperm->ipc_uid;
5900Sstevel@tonic-gate perm64->ipcx_gid = kperm->ipc_gid;
5910Sstevel@tonic-gate perm64->ipcx_cuid = kperm->ipc_cuid;
5920Sstevel@tonic-gate perm64->ipcx_cgid = kperm->ipc_cgid;
5930Sstevel@tonic-gate perm64->ipcx_mode = kperm->ipc_mode;
5940Sstevel@tonic-gate perm64->ipcx_key = kperm->ipc_key;
5950Sstevel@tonic-gate perm64->ipcx_projid = kperm->ipc_proj->kpj_id;
5960Sstevel@tonic-gate perm64->ipcx_zoneid = kperm->ipc_zoneid;
5970Sstevel@tonic-gate }
5980Sstevel@tonic-gate
5990Sstevel@tonic-gate
6000Sstevel@tonic-gate /*
6010Sstevel@tonic-gate * ipc key comparator.
6020Sstevel@tonic-gate */
6030Sstevel@tonic-gate static int
ipc_key_compar(const void * a,const void * b)6040Sstevel@tonic-gate ipc_key_compar(const void *a, const void *b)
6050Sstevel@tonic-gate {
6060Sstevel@tonic-gate kipc_perm_t *aperm = (kipc_perm_t *)a;
6070Sstevel@tonic-gate kipc_perm_t *bperm = (kipc_perm_t *)b;
6080Sstevel@tonic-gate int ak = aperm->ipc_key;
6090Sstevel@tonic-gate int bk = bperm->ipc_key;
6100Sstevel@tonic-gate zoneid_t az;
6110Sstevel@tonic-gate zoneid_t bz;
6120Sstevel@tonic-gate
6130Sstevel@tonic-gate ASSERT(ak != IPC_PRIVATE);
6140Sstevel@tonic-gate ASSERT(bk != IPC_PRIVATE);
6150Sstevel@tonic-gate
6160Sstevel@tonic-gate /*
6170Sstevel@tonic-gate * Compare key first, then zoneid. This optimizes performance for
6180Sstevel@tonic-gate * systems with only one zone, since the zone checks will only be
6190Sstevel@tonic-gate * made when the keys match.
6200Sstevel@tonic-gate */
6210Sstevel@tonic-gate if (ak < bk)
6220Sstevel@tonic-gate return (-1);
6230Sstevel@tonic-gate if (ak > bk)
6240Sstevel@tonic-gate return (1);
6250Sstevel@tonic-gate
6260Sstevel@tonic-gate /* keys match */
6270Sstevel@tonic-gate az = aperm->ipc_zoneid;
6280Sstevel@tonic-gate bz = bperm->ipc_zoneid;
6290Sstevel@tonic-gate if (az < bz)
6300Sstevel@tonic-gate return (-1);
6310Sstevel@tonic-gate if (az > bz)
6320Sstevel@tonic-gate return (1);
6330Sstevel@tonic-gate return (0);
6340Sstevel@tonic-gate }
6350Sstevel@tonic-gate
6360Sstevel@tonic-gate /*
6370Sstevel@tonic-gate * Create an ipc service.
6380Sstevel@tonic-gate */
6390Sstevel@tonic-gate ipc_service_t *
ipcs_create(const char * name,rctl_hndl_t proj_rctl,rctl_hndl_t zone_rctl,size_t size,ipc_func_t * dtor,ipc_func_t * rmid,int audit_type,size_t rctl_offset)6402677Sml93401 ipcs_create(const char *name, rctl_hndl_t proj_rctl, rctl_hndl_t zone_rctl,
6412677Sml93401 size_t size, ipc_func_t *dtor, ipc_func_t *rmid, int audit_type,
6422677Sml93401 size_t rctl_offset)
6430Sstevel@tonic-gate {
6440Sstevel@tonic-gate ipc_service_t *result;
6450Sstevel@tonic-gate
6460Sstevel@tonic-gate result = kmem_alloc(sizeof (ipc_service_t), KM_SLEEP);
6470Sstevel@tonic-gate
6480Sstevel@tonic-gate mutex_init(&result->ipcs_lock, NULL, MUTEX_ADAPTIVE, NULL);
6490Sstevel@tonic-gate result->ipcs_count = 0;
6500Sstevel@tonic-gate avl_create(&result->ipcs_keys, ipc_key_compar, size, 0);
6510Sstevel@tonic-gate result->ipcs_tabsz = IPC_IDS_MIN;
6520Sstevel@tonic-gate result->ipcs_table =
6530Sstevel@tonic-gate kmem_zalloc(IPC_IDS_MIN * sizeof (ipc_slot_t), KM_SLEEP);
6540Sstevel@tonic-gate result->ipcs_ssize = size;
6550Sstevel@tonic-gate result->ipcs_ids = id_space_create(name, 0, IPC_IDS_MIN);
6560Sstevel@tonic-gate result->ipcs_dtor = dtor;
6570Sstevel@tonic-gate result->ipcs_rmid = rmid;
6582677Sml93401 result->ipcs_proj_rctl = proj_rctl;
6592677Sml93401 result->ipcs_zone_rctl = zone_rctl;
6600Sstevel@tonic-gate result->ipcs_atype = audit_type;
6612677Sml93401 ASSERT(rctl_offset < sizeof (ipc_rqty_t));
6620Sstevel@tonic-gate result->ipcs_rctlofs = rctl_offset;
6630Sstevel@tonic-gate list_create(&result->ipcs_usedids, sizeof (kipc_perm_t),
6640Sstevel@tonic-gate offsetof(kipc_perm_t, ipc_list));
6650Sstevel@tonic-gate
6660Sstevel@tonic-gate return (result);
6670Sstevel@tonic-gate }
6680Sstevel@tonic-gate
6690Sstevel@tonic-gate /*
6700Sstevel@tonic-gate * Destroy an ipc service.
6710Sstevel@tonic-gate */
6720Sstevel@tonic-gate void
ipcs_destroy(ipc_service_t * service)6730Sstevel@tonic-gate ipcs_destroy(ipc_service_t *service)
6740Sstevel@tonic-gate {
6750Sstevel@tonic-gate ipc_slot_t *slot, *next;
6760Sstevel@tonic-gate
6770Sstevel@tonic-gate mutex_enter(&service->ipcs_lock);
6780Sstevel@tonic-gate
6790Sstevel@tonic-gate ASSERT(service->ipcs_count == 0);
6800Sstevel@tonic-gate avl_destroy(&service->ipcs_keys);
6810Sstevel@tonic-gate list_destroy(&service->ipcs_usedids);
6820Sstevel@tonic-gate id_space_destroy(service->ipcs_ids);
6830Sstevel@tonic-gate
6840Sstevel@tonic-gate for (slot = service->ipcs_table; slot; slot = next) {
6850Sstevel@tonic-gate next = slot[0].ipct_chain;
6860Sstevel@tonic-gate kmem_free(slot, service->ipcs_tabsz * sizeof (ipc_slot_t));
6870Sstevel@tonic-gate service->ipcs_tabsz >>= 1;
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate
6900Sstevel@tonic-gate mutex_destroy(&service->ipcs_lock);
6910Sstevel@tonic-gate kmem_free(service, sizeof (ipc_service_t));
6920Sstevel@tonic-gate }
6930Sstevel@tonic-gate
6940Sstevel@tonic-gate /*
6950Sstevel@tonic-gate * Takes the service lock.
6960Sstevel@tonic-gate */
6970Sstevel@tonic-gate void
ipcs_lock(ipc_service_t * service)6980Sstevel@tonic-gate ipcs_lock(ipc_service_t *service)
6990Sstevel@tonic-gate {
7000Sstevel@tonic-gate mutex_enter(&service->ipcs_lock);
7010Sstevel@tonic-gate }
7020Sstevel@tonic-gate
7030Sstevel@tonic-gate /*
7040Sstevel@tonic-gate * Releases the service lock.
7050Sstevel@tonic-gate */
7060Sstevel@tonic-gate void
ipcs_unlock(ipc_service_t * service)7070Sstevel@tonic-gate ipcs_unlock(ipc_service_t *service)
7080Sstevel@tonic-gate {
7090Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
7100Sstevel@tonic-gate }
7110Sstevel@tonic-gate
7120Sstevel@tonic-gate
7130Sstevel@tonic-gate /*
7140Sstevel@tonic-gate * Locks the specified ID. Returns the ID's ID table index.
7150Sstevel@tonic-gate */
7160Sstevel@tonic-gate static int
ipc_lock_internal(ipc_service_t * service,uint_t id)7170Sstevel@tonic-gate ipc_lock_internal(ipc_service_t *service, uint_t id)
7180Sstevel@tonic-gate {
7190Sstevel@tonic-gate uint_t tabsz;
7200Sstevel@tonic-gate uint_t index;
7210Sstevel@tonic-gate kmutex_t *mutex;
7220Sstevel@tonic-gate
7230Sstevel@tonic-gate for (;;) {
7240Sstevel@tonic-gate tabsz = service->ipcs_tabsz;
7250Sstevel@tonic-gate membar_consumer();
7260Sstevel@tonic-gate index = id & (tabsz - 1);
7270Sstevel@tonic-gate mutex = &service->ipcs_table[index].ipct_lock;
7280Sstevel@tonic-gate mutex_enter(mutex);
7290Sstevel@tonic-gate if (tabsz == service->ipcs_tabsz)
7300Sstevel@tonic-gate break;
7310Sstevel@tonic-gate mutex_exit(mutex);
7320Sstevel@tonic-gate }
7330Sstevel@tonic-gate
7340Sstevel@tonic-gate return (index);
7350Sstevel@tonic-gate }
7360Sstevel@tonic-gate
7370Sstevel@tonic-gate /*
7380Sstevel@tonic-gate * Locks the specified ID. Returns a pointer to the ID's lock.
7390Sstevel@tonic-gate */
7400Sstevel@tonic-gate kmutex_t *
ipc_lock(ipc_service_t * service,int id)7410Sstevel@tonic-gate ipc_lock(ipc_service_t *service, int id)
7420Sstevel@tonic-gate {
7430Sstevel@tonic-gate uint_t index;
7440Sstevel@tonic-gate
7450Sstevel@tonic-gate /*
7460Sstevel@tonic-gate * These assertions don't reflect requirements of the code
7470Sstevel@tonic-gate * which follows, but they should never fail nonetheless.
7480Sstevel@tonic-gate */
7490Sstevel@tonic-gate ASSERT(id >= 0);
7500Sstevel@tonic-gate ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
7510Sstevel@tonic-gate index = ipc_lock_internal(service, id);
7520Sstevel@tonic-gate
7530Sstevel@tonic-gate return (&service->ipcs_table[index].ipct_lock);
7540Sstevel@tonic-gate }
7550Sstevel@tonic-gate
7560Sstevel@tonic-gate /*
7570Sstevel@tonic-gate * Checks to see if the held lock provided is the current lock for the
7580Sstevel@tonic-gate * specified id. If so, we return it instead of dropping it and
7590Sstevel@tonic-gate * returning the result of ipc_lock. This is intended to speed up cv
7600Sstevel@tonic-gate * wakeups where we are left holding a lock which could be stale, but
7610Sstevel@tonic-gate * probably isn't.
7620Sstevel@tonic-gate */
7630Sstevel@tonic-gate kmutex_t *
ipc_relock(ipc_service_t * service,int id,kmutex_t * lock)7640Sstevel@tonic-gate ipc_relock(ipc_service_t *service, int id, kmutex_t *lock)
7650Sstevel@tonic-gate {
7660Sstevel@tonic-gate ASSERT(id >= 0);
7670Sstevel@tonic-gate ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
7680Sstevel@tonic-gate ASSERT(MUTEX_HELD(lock));
7690Sstevel@tonic-gate
7700Sstevel@tonic-gate if (&service->ipcs_table[IPC_INDEX(id)].ipct_lock == lock)
7710Sstevel@tonic-gate return (lock);
7720Sstevel@tonic-gate
7730Sstevel@tonic-gate mutex_exit(lock);
7740Sstevel@tonic-gate return (ipc_lock(service, id));
7750Sstevel@tonic-gate }
7760Sstevel@tonic-gate
7770Sstevel@tonic-gate /*
7780Sstevel@tonic-gate * Performs an ID lookup. If the ID doesn't exist or has been removed,
7790Sstevel@tonic-gate * or isn't visible to the caller (because of zones), NULL is returned.
7800Sstevel@tonic-gate * Otherwise, a pointer to the ID's perm structure and held ID lock are
7810Sstevel@tonic-gate * returned.
7820Sstevel@tonic-gate */
7830Sstevel@tonic-gate kmutex_t *
ipc_lookup(ipc_service_t * service,int id,kipc_perm_t ** perm)7840Sstevel@tonic-gate ipc_lookup(ipc_service_t *service, int id, kipc_perm_t **perm)
7850Sstevel@tonic-gate {
7860Sstevel@tonic-gate kipc_perm_t *result;
7870Sstevel@tonic-gate uint_t index;
7880Sstevel@tonic-gate
7890Sstevel@tonic-gate /*
7900Sstevel@tonic-gate * There is no need to check to see if id is in-range (i.e.
7910Sstevel@tonic-gate * positive and fits into the table). If it is out-of-range,
7920Sstevel@tonic-gate * the id simply won't match the object's.
7930Sstevel@tonic-gate */
7940Sstevel@tonic-gate
7950Sstevel@tonic-gate index = ipc_lock_internal(service, id);
7960Sstevel@tonic-gate result = service->ipcs_table[index].ipct_data;
7970Sstevel@tonic-gate if (result == NULL || result->ipc_id != (uint_t)id ||
7980Sstevel@tonic-gate !HASZONEACCESS(curproc, result->ipc_zoneid)) {
7990Sstevel@tonic-gate mutex_exit(&service->ipcs_table[index].ipct_lock);
8000Sstevel@tonic-gate return (NULL);
8010Sstevel@tonic-gate }
8020Sstevel@tonic-gate
8030Sstevel@tonic-gate ASSERT(IPC_SEQ(id) == service->ipcs_table[index].ipct_seq);
8040Sstevel@tonic-gate
8050Sstevel@tonic-gate *perm = result;
80611861SMarek.Pospisil@Sun.COM if (AU_AUDITING())
8070Sstevel@tonic-gate audit_ipc(service->ipcs_atype, id, result);
8080Sstevel@tonic-gate
8090Sstevel@tonic-gate return (&service->ipcs_table[index].ipct_lock);
8100Sstevel@tonic-gate }
8110Sstevel@tonic-gate
8120Sstevel@tonic-gate /*
8130Sstevel@tonic-gate * Increase the reference count on an ID.
8140Sstevel@tonic-gate */
8150Sstevel@tonic-gate /*ARGSUSED*/
8160Sstevel@tonic-gate void
ipc_hold(ipc_service_t * s,kipc_perm_t * perm)8170Sstevel@tonic-gate ipc_hold(ipc_service_t *s, kipc_perm_t *perm)
8180Sstevel@tonic-gate {
8190Sstevel@tonic-gate ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
8200Sstevel@tonic-gate ASSERT(IPC_LOCKED(s, perm));
8210Sstevel@tonic-gate perm->ipc_ref++;
8220Sstevel@tonic-gate }
8230Sstevel@tonic-gate
8240Sstevel@tonic-gate /*
8250Sstevel@tonic-gate * Decrease the reference count on an ID and drops the ID's lock.
8260Sstevel@tonic-gate * Destroys the ID if the new reference count is zero.
8270Sstevel@tonic-gate */
8280Sstevel@tonic-gate void
ipc_rele(ipc_service_t * s,kipc_perm_t * perm)8290Sstevel@tonic-gate ipc_rele(ipc_service_t *s, kipc_perm_t *perm)
8300Sstevel@tonic-gate {
8310Sstevel@tonic-gate int nref;
8320Sstevel@tonic-gate
8330Sstevel@tonic-gate ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
8340Sstevel@tonic-gate ASSERT(IPC_LOCKED(s, perm));
8350Sstevel@tonic-gate ASSERT(perm->ipc_ref > 0);
8360Sstevel@tonic-gate
8370Sstevel@tonic-gate nref = --perm->ipc_ref;
8380Sstevel@tonic-gate mutex_exit(&s->ipcs_table[IPC_INDEX(perm->ipc_id)].ipct_lock);
8390Sstevel@tonic-gate
8400Sstevel@tonic-gate if (nref == 0) {
8410Sstevel@tonic-gate ASSERT(IPC_FREE(perm)); /* ipc_rmid clears IPC_ALLOC */
8420Sstevel@tonic-gate s->ipcs_dtor(perm);
8430Sstevel@tonic-gate project_rele(perm->ipc_proj);
844*13096SJordan.Vaughan@Sun.com zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
8450Sstevel@tonic-gate kmem_free(perm, s->ipcs_ssize);
8460Sstevel@tonic-gate }
8470Sstevel@tonic-gate }
8480Sstevel@tonic-gate
8490Sstevel@tonic-gate /*
8500Sstevel@tonic-gate * Decrease the reference count on an ID, but don't drop the ID lock.
8510Sstevel@tonic-gate * Used in cases where one thread needs to remove many references (on
8520Sstevel@tonic-gate * behalf of other parties).
8530Sstevel@tonic-gate */
8540Sstevel@tonic-gate void
ipc_rele_locked(ipc_service_t * s,kipc_perm_t * perm)8550Sstevel@tonic-gate ipc_rele_locked(ipc_service_t *s, kipc_perm_t *perm)
8560Sstevel@tonic-gate {
8570Sstevel@tonic-gate ASSERT(perm->ipc_ref > 1);
8580Sstevel@tonic-gate ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
8590Sstevel@tonic-gate ASSERT(IPC_LOCKED(s, perm));
8600Sstevel@tonic-gate
8610Sstevel@tonic-gate perm->ipc_ref--;
8620Sstevel@tonic-gate }
8630Sstevel@tonic-gate
8640Sstevel@tonic-gate
8650Sstevel@tonic-gate /*
8660Sstevel@tonic-gate * Internal function to grow the service ID table.
8670Sstevel@tonic-gate */
8680Sstevel@tonic-gate static int
ipc_grow(ipc_service_t * service)8690Sstevel@tonic-gate ipc_grow(ipc_service_t *service)
8700Sstevel@tonic-gate {
8710Sstevel@tonic-gate ipc_slot_t *new, *old;
8720Sstevel@tonic-gate int i, oldsize, newsize;
8730Sstevel@tonic-gate
8740Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock));
8750Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
8760Sstevel@tonic-gate
8770Sstevel@tonic-gate if (service->ipcs_tabsz == IPC_IDS_MAX)
8780Sstevel@tonic-gate return (ENOSPC);
8790Sstevel@tonic-gate
8800Sstevel@tonic-gate oldsize = service->ipcs_tabsz;
8810Sstevel@tonic-gate newsize = oldsize << 1;
8820Sstevel@tonic-gate new = kmem_zalloc(newsize * sizeof (ipc_slot_t), KM_NOSLEEP);
8830Sstevel@tonic-gate if (new == NULL)
8840Sstevel@tonic-gate return (ENOSPC);
8850Sstevel@tonic-gate
8860Sstevel@tonic-gate old = service->ipcs_table;
8870Sstevel@tonic-gate for (i = 0; i < oldsize; i++) {
8880Sstevel@tonic-gate mutex_enter(&old[i].ipct_lock);
8890Sstevel@tonic-gate mutex_enter(&new[i].ipct_lock);
8900Sstevel@tonic-gate
8910Sstevel@tonic-gate new[i].ipct_seq = old[i].ipct_seq;
8920Sstevel@tonic-gate new[i].ipct_data = old[i].ipct_data;
8930Sstevel@tonic-gate old[i].ipct_data = NULL;
8940Sstevel@tonic-gate }
8950Sstevel@tonic-gate
8960Sstevel@tonic-gate new[0].ipct_chain = old;
8970Sstevel@tonic-gate service->ipcs_table = new;
8980Sstevel@tonic-gate membar_producer();
8990Sstevel@tonic-gate service->ipcs_tabsz = newsize;
9000Sstevel@tonic-gate
9010Sstevel@tonic-gate for (i = 0; i < oldsize; i++) {
9020Sstevel@tonic-gate mutex_exit(&old[i].ipct_lock);
9030Sstevel@tonic-gate mutex_exit(&new[i].ipct_lock);
9040Sstevel@tonic-gate }
9050Sstevel@tonic-gate
9060Sstevel@tonic-gate id_space_extend(service->ipcs_ids, oldsize, service->ipcs_tabsz);
9070Sstevel@tonic-gate
9080Sstevel@tonic-gate return (0);
9090Sstevel@tonic-gate }
9100Sstevel@tonic-gate
9110Sstevel@tonic-gate
9120Sstevel@tonic-gate static int
ipc_keylookup(ipc_service_t * service,key_t key,int flag,kipc_perm_t ** permp)9130Sstevel@tonic-gate ipc_keylookup(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp)
9140Sstevel@tonic-gate {
9150Sstevel@tonic-gate kipc_perm_t *perm = NULL;
9160Sstevel@tonic-gate avl_index_t where;
9170Sstevel@tonic-gate kipc_perm_t template;
9180Sstevel@tonic-gate
9190Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock));
9200Sstevel@tonic-gate
9210Sstevel@tonic-gate template.ipc_key = key;
9220Sstevel@tonic-gate template.ipc_zoneid = getzoneid();
9230Sstevel@tonic-gate if (perm = avl_find(&service->ipcs_keys, &template, &where)) {
9240Sstevel@tonic-gate ASSERT(!IPC_FREE(perm));
9250Sstevel@tonic-gate if ((flag & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
9260Sstevel@tonic-gate return (EEXIST);
9270Sstevel@tonic-gate if ((flag & 0777) & ~perm->ipc_mode) {
92811861SMarek.Pospisil@Sun.COM if (AU_AUDITING())
9290Sstevel@tonic-gate audit_ipcget(NULL, (void *)perm);
9300Sstevel@tonic-gate return (EACCES);
9310Sstevel@tonic-gate }
9320Sstevel@tonic-gate *permp = perm;
9330Sstevel@tonic-gate return (0);
9340Sstevel@tonic-gate } else if (flag & IPC_CREAT) {
9350Sstevel@tonic-gate *permp = NULL;
9360Sstevel@tonic-gate return (0);
9370Sstevel@tonic-gate }
9380Sstevel@tonic-gate return (ENOENT);
9390Sstevel@tonic-gate }
9400Sstevel@tonic-gate
9410Sstevel@tonic-gate static int
ipc_alloc_test(ipc_service_t * service,proc_t * pp)9420Sstevel@tonic-gate ipc_alloc_test(ipc_service_t *service, proc_t *pp)
9430Sstevel@tonic-gate {
9440Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock));
9450Sstevel@tonic-gate
9460Sstevel@tonic-gate /*
9470Sstevel@tonic-gate * Resizing the table first would result in a cleaner code
9480Sstevel@tonic-gate * path, but would also allow a user to (permanently) double
9490Sstevel@tonic-gate * the id table size in cases where the allocation would be
9500Sstevel@tonic-gate * denied. Hence we test the rctl first.
9510Sstevel@tonic-gate */
9520Sstevel@tonic-gate retry:
9530Sstevel@tonic-gate mutex_enter(&pp->p_lock);
9542677Sml93401 if ((rctl_test(service->ipcs_proj_rctl, pp->p_task->tk_proj->kpj_rctls,
9552677Sml93401 pp, 1, RCA_SAFE) & RCT_DENY) ||
9562677Sml93401 (rctl_test(service->ipcs_zone_rctl, pp->p_zone->zone_rctls,
9572677Sml93401 pp, 1, RCA_SAFE) & RCT_DENY)) {
9580Sstevel@tonic-gate mutex_exit(&pp->p_lock);
9590Sstevel@tonic-gate return (ENOSPC);
9600Sstevel@tonic-gate }
9610Sstevel@tonic-gate
9620Sstevel@tonic-gate if (service->ipcs_count == service->ipcs_tabsz) {
9630Sstevel@tonic-gate int error;
9640Sstevel@tonic-gate
9650Sstevel@tonic-gate mutex_exit(&pp->p_lock);
9660Sstevel@tonic-gate if (error = ipc_grow(service))
9670Sstevel@tonic-gate return (error);
9680Sstevel@tonic-gate goto retry;
9690Sstevel@tonic-gate }
9700Sstevel@tonic-gate
9710Sstevel@tonic-gate return (0);
9720Sstevel@tonic-gate }
9730Sstevel@tonic-gate
9740Sstevel@tonic-gate /*
9750Sstevel@tonic-gate * Given a key, search for or create the associated identifier.
9760Sstevel@tonic-gate *
9770Sstevel@tonic-gate * If IPC_CREAT is specified and the key isn't found, or if the key is
9780Sstevel@tonic-gate * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly
9790Sstevel@tonic-gate * allocated object structure in permp. A pointer to the held service
9800Sstevel@tonic-gate * lock is placed in lockp. ipc_mode's IPC_ALLOC bit is clear.
9810Sstevel@tonic-gate *
9820Sstevel@tonic-gate * If the key is found and no error conditions arise, we return 0 and
9830Sstevel@tonic-gate * place a pointer to the existing object structure in permp. A
9840Sstevel@tonic-gate * pointer to the held ID lock is placed in lockp. ipc_mode's
9850Sstevel@tonic-gate * IPC_ALLOC bit is set.
9860Sstevel@tonic-gate *
9870Sstevel@tonic-gate * Otherwise, a non-zero errno value is returned.
9880Sstevel@tonic-gate */
9890Sstevel@tonic-gate int
ipc_get(ipc_service_t * service,key_t key,int flag,kipc_perm_t ** permp,kmutex_t ** lockp)9900Sstevel@tonic-gate ipc_get(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp,
9910Sstevel@tonic-gate kmutex_t **lockp)
9920Sstevel@tonic-gate {
9930Sstevel@tonic-gate kipc_perm_t *perm = NULL;
9940Sstevel@tonic-gate proc_t *pp = curproc;
9950Sstevel@tonic-gate int error, index;
9960Sstevel@tonic-gate cred_t *cr = CRED();
9970Sstevel@tonic-gate
9980Sstevel@tonic-gate if (key != IPC_PRIVATE) {
9990Sstevel@tonic-gate
10000Sstevel@tonic-gate mutex_enter(&service->ipcs_lock);
10010Sstevel@tonic-gate error = ipc_keylookup(service, key, flag, &perm);
10020Sstevel@tonic-gate if (perm != NULL)
10030Sstevel@tonic-gate index = ipc_lock_internal(service, perm->ipc_id);
10040Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
10050Sstevel@tonic-gate
10060Sstevel@tonic-gate if (error) {
10070Sstevel@tonic-gate ASSERT(perm == NULL);
10080Sstevel@tonic-gate return (error);
10090Sstevel@tonic-gate }
10100Sstevel@tonic-gate
10110Sstevel@tonic-gate if (perm) {
10120Sstevel@tonic-gate ASSERT(!IPC_FREE(perm));
10130Sstevel@tonic-gate *permp = perm;
10140Sstevel@tonic-gate *lockp = &service->ipcs_table[index].ipct_lock;
10150Sstevel@tonic-gate return (0);
10160Sstevel@tonic-gate }
10170Sstevel@tonic-gate
10180Sstevel@tonic-gate /* Key not found; fall through */
10190Sstevel@tonic-gate }
10200Sstevel@tonic-gate
10210Sstevel@tonic-gate perm = kmem_zalloc(service->ipcs_ssize, KM_SLEEP);
10220Sstevel@tonic-gate
10230Sstevel@tonic-gate mutex_enter(&service->ipcs_lock);
10240Sstevel@tonic-gate if (error = ipc_alloc_test(service, pp)) {
10250Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
10260Sstevel@tonic-gate kmem_free(perm, service->ipcs_ssize);
10270Sstevel@tonic-gate return (error);
10280Sstevel@tonic-gate }
10290Sstevel@tonic-gate
10300Sstevel@tonic-gate perm->ipc_cuid = perm->ipc_uid = crgetuid(cr);
10310Sstevel@tonic-gate perm->ipc_cgid = perm->ipc_gid = crgetgid(cr);
10320Sstevel@tonic-gate perm->ipc_zoneid = getzoneid();
10330Sstevel@tonic-gate perm->ipc_mode = flag & 0777;
10340Sstevel@tonic-gate perm->ipc_key = key;
10350Sstevel@tonic-gate perm->ipc_ref = 1;
10360Sstevel@tonic-gate perm->ipc_id = IPC_ID_INVAL;
10370Sstevel@tonic-gate *permp = perm;
10380Sstevel@tonic-gate *lockp = &service->ipcs_lock;
10390Sstevel@tonic-gate
10400Sstevel@tonic-gate return (0);
10410Sstevel@tonic-gate }
10420Sstevel@tonic-gate
10430Sstevel@tonic-gate /*
10440Sstevel@tonic-gate * Attempts to add the a newly created ID to the global namespace. If
10450Sstevel@tonic-gate * creating it would cause an error, we return the error. If there is
10460Sstevel@tonic-gate * the possibility that we could obtain the existing ID and return it
10470Sstevel@tonic-gate * to the user, we return EAGAIN. Otherwise, we return 0 with p_lock
10480Sstevel@tonic-gate * and the service lock held.
10490Sstevel@tonic-gate *
10500Sstevel@tonic-gate * Since this should be only called after all initialization has been
10510Sstevel@tonic-gate * completed, on failure we automatically invoke the destructor for the
10520Sstevel@tonic-gate * object and deallocate the memory associated with it.
10530Sstevel@tonic-gate */
10540Sstevel@tonic-gate int
ipc_commit_begin(ipc_service_t * service,key_t key,int flag,kipc_perm_t * newperm)10550Sstevel@tonic-gate ipc_commit_begin(ipc_service_t *service, key_t key, int flag,
10560Sstevel@tonic-gate kipc_perm_t *newperm)
10570Sstevel@tonic-gate {
10580Sstevel@tonic-gate kipc_perm_t *perm;
10590Sstevel@tonic-gate int error;
10600Sstevel@tonic-gate proc_t *pp = curproc;
10610Sstevel@tonic-gate
10620Sstevel@tonic-gate ASSERT(newperm->ipc_ref == 1);
10630Sstevel@tonic-gate ASSERT(IPC_FREE(newperm));
10640Sstevel@tonic-gate
10653458Ssl108498 /*
1066*13096SJordan.Vaughan@Sun.com * Set ipc_proj and ipc_zone_ref so that future calls to ipc_cleanup()
10673458Ssl108498 * clean up the necessary state. This must be done before the
10683458Ssl108498 * potential call to ipcs_dtor() below.
10693458Ssl108498 */
10703458Ssl108498 newperm->ipc_proj = pp->p_task->tk_proj;
1071*13096SJordan.Vaughan@Sun.com zone_init_ref(&newperm->ipc_zone_ref);
1072*13096SJordan.Vaughan@Sun.com zone_hold_ref(pp->p_zone, &newperm->ipc_zone_ref, ZONE_REF_IPC);
10733458Ssl108498
10740Sstevel@tonic-gate mutex_enter(&service->ipcs_lock);
10750Sstevel@tonic-gate /*
10760Sstevel@tonic-gate * Ensure that no-one has raced with us and created the key.
10770Sstevel@tonic-gate */
10780Sstevel@tonic-gate if ((key != IPC_PRIVATE) &&
10790Sstevel@tonic-gate (((error = ipc_keylookup(service, key, flag, &perm)) != 0) ||
10800Sstevel@tonic-gate (perm != NULL))) {
10810Sstevel@tonic-gate error = error ? error : EAGAIN;
10820Sstevel@tonic-gate goto errout;
10830Sstevel@tonic-gate }
10840Sstevel@tonic-gate
10850Sstevel@tonic-gate /*
10860Sstevel@tonic-gate * Ensure that no-one has raced with us and used the last of
10870Sstevel@tonic-gate * the permissible ids, or the last of the free spaces in the
10880Sstevel@tonic-gate * id table.
10890Sstevel@tonic-gate */
10900Sstevel@tonic-gate if (error = ipc_alloc_test(service, pp))
10910Sstevel@tonic-gate goto errout;
10920Sstevel@tonic-gate
10930Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock));
10940Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pp->p_lock));
10950Sstevel@tonic-gate
10960Sstevel@tonic-gate return (0);
10970Sstevel@tonic-gate errout:
10980Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
10990Sstevel@tonic-gate service->ipcs_dtor(newperm);
1100*13096SJordan.Vaughan@Sun.com zone_rele_ref(&newperm->ipc_zone_ref, ZONE_REF_IPC);
11010Sstevel@tonic-gate kmem_free(newperm, service->ipcs_ssize);
11020Sstevel@tonic-gate return (error);
11030Sstevel@tonic-gate }
11040Sstevel@tonic-gate
11050Sstevel@tonic-gate /*
11060Sstevel@tonic-gate * Commit the ID allocation transaction. Called with p_lock and the
11070Sstevel@tonic-gate * service lock held, both of which are dropped. Returns the held ID
11080Sstevel@tonic-gate * lock so the caller can extract the ID and perform ipcget auditing.
11090Sstevel@tonic-gate */
11100Sstevel@tonic-gate kmutex_t *
ipc_commit_end(ipc_service_t * service,kipc_perm_t * perm)11110Sstevel@tonic-gate ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm)
11120Sstevel@tonic-gate {
11130Sstevel@tonic-gate ipc_slot_t *slot;
11140Sstevel@tonic-gate avl_index_t where;
11150Sstevel@tonic-gate int index;
11160Sstevel@tonic-gate void *loc;
11170Sstevel@tonic-gate
11180Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock));
11190Sstevel@tonic-gate ASSERT(MUTEX_HELD(&curproc->p_lock));
11200Sstevel@tonic-gate
11210Sstevel@tonic-gate (void) project_hold(perm->ipc_proj);
11220Sstevel@tonic-gate mutex_exit(&curproc->p_lock);
11230Sstevel@tonic-gate
11240Sstevel@tonic-gate /*
11250Sstevel@tonic-gate * Pick out our slot.
11260Sstevel@tonic-gate */
11270Sstevel@tonic-gate service->ipcs_count++;
11280Sstevel@tonic-gate index = id_alloc(service->ipcs_ids);
11290Sstevel@tonic-gate ASSERT(index < service->ipcs_tabsz);
11300Sstevel@tonic-gate slot = &service->ipcs_table[index];
11310Sstevel@tonic-gate mutex_enter(&slot->ipct_lock);
11320Sstevel@tonic-gate ASSERT(slot->ipct_data == NULL);
11330Sstevel@tonic-gate
11340Sstevel@tonic-gate /*
11350Sstevel@tonic-gate * Update the perm structure.
11360Sstevel@tonic-gate */
11370Sstevel@tonic-gate perm->ipc_mode |= IPC_ALLOC;
11380Sstevel@tonic-gate perm->ipc_id = (slot->ipct_seq << IPC_SEQ_SHIFT) | index;
11390Sstevel@tonic-gate
11400Sstevel@tonic-gate /*
11410Sstevel@tonic-gate * Push into global visibility.
11420Sstevel@tonic-gate */
11430Sstevel@tonic-gate slot->ipct_data = perm;
11440Sstevel@tonic-gate if (perm->ipc_key != IPC_PRIVATE) {
11450Sstevel@tonic-gate loc = avl_find(&service->ipcs_keys, perm, &where);
11460Sstevel@tonic-gate ASSERT(loc == NULL);
11470Sstevel@tonic-gate avl_insert(&service->ipcs_keys, perm, where);
11480Sstevel@tonic-gate }
11490Sstevel@tonic-gate list_insert_head(&service->ipcs_usedids, perm);
11500Sstevel@tonic-gate
11510Sstevel@tonic-gate /*
11520Sstevel@tonic-gate * Update resource consumption.
11530Sstevel@tonic-gate */
11542677Sml93401 IPC_PROJ_USAGE(perm, service) += 1;
11552677Sml93401 IPC_ZONE_USAGE(perm, service) += 1;
11560Sstevel@tonic-gate
11570Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
11580Sstevel@tonic-gate return (&slot->ipct_lock);
11590Sstevel@tonic-gate }
11600Sstevel@tonic-gate
11610Sstevel@tonic-gate /*
11620Sstevel@tonic-gate * Clean up function, in case the allocation fails. If called between
11630Sstevel@tonic-gate * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we
11640Sstevel@tonic-gate * merely free the perm structure. If called after ipc_commit_begin,
11650Sstevel@tonic-gate * we also drop locks and call the ID's destructor.
11660Sstevel@tonic-gate */
11670Sstevel@tonic-gate void
ipc_cleanup(ipc_service_t * service,kipc_perm_t * perm)11680Sstevel@tonic-gate ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm)
11690Sstevel@tonic-gate {
11700Sstevel@tonic-gate ASSERT(IPC_FREE(perm));
11710Sstevel@tonic-gate if (perm->ipc_proj) {
11720Sstevel@tonic-gate mutex_exit(&curproc->p_lock);
11730Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
11740Sstevel@tonic-gate service->ipcs_dtor(perm);
11750Sstevel@tonic-gate }
1176*13096SJordan.Vaughan@Sun.com if (perm->ipc_zone_ref.zref_zone != NULL)
1177*13096SJordan.Vaughan@Sun.com zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
11780Sstevel@tonic-gate kmem_free(perm, service->ipcs_ssize);
11790Sstevel@tonic-gate }
11800Sstevel@tonic-gate
11810Sstevel@tonic-gate
11820Sstevel@tonic-gate /*
11830Sstevel@tonic-gate * Common code to remove an IPC object. This should be called after
11840Sstevel@tonic-gate * all permissions checks have been performed, and with the service
11850Sstevel@tonic-gate * and ID locked. Note that this does not remove the object from
11860Sstevel@tonic-gate * the ipcs_usedids list (this needs to be done by the caller before
11870Sstevel@tonic-gate * dropping the service lock).
11880Sstevel@tonic-gate */
11890Sstevel@tonic-gate static void
ipc_remove(ipc_service_t * service,kipc_perm_t * perm)11900Sstevel@tonic-gate ipc_remove(ipc_service_t *service, kipc_perm_t *perm)
11910Sstevel@tonic-gate {
11920Sstevel@tonic-gate int id = perm->ipc_id;
11930Sstevel@tonic-gate int index;
11940Sstevel@tonic-gate
11950Sstevel@tonic-gate ASSERT(MUTEX_HELD(&service->ipcs_lock));
11960Sstevel@tonic-gate ASSERT(IPC_LOCKED(service, perm));
11970Sstevel@tonic-gate
11980Sstevel@tonic-gate index = IPC_INDEX(id);
11990Sstevel@tonic-gate
12000Sstevel@tonic-gate service->ipcs_table[index].ipct_data = NULL;
12010Sstevel@tonic-gate
12020Sstevel@tonic-gate if (perm->ipc_key != IPC_PRIVATE)
12030Sstevel@tonic-gate avl_remove(&service->ipcs_keys, perm);
12040Sstevel@tonic-gate list_remove(&service->ipcs_usedids, perm);
12050Sstevel@tonic-gate perm->ipc_mode &= ~IPC_ALLOC;
12060Sstevel@tonic-gate
12070Sstevel@tonic-gate id_free(service->ipcs_ids, index);
12080Sstevel@tonic-gate
12090Sstevel@tonic-gate if (service->ipcs_table[index].ipct_seq++ == IPC_SEQ_MASK)
12100Sstevel@tonic-gate service->ipcs_table[index].ipct_seq = 0;
12110Sstevel@tonic-gate service->ipcs_count--;
12122677Sml93401 ASSERT(IPC_PROJ_USAGE(perm, service) > 0);
12132677Sml93401 ASSERT(IPC_ZONE_USAGE(perm, service) > 0);
12142677Sml93401 IPC_PROJ_USAGE(perm, service) -= 1;
12152677Sml93401 IPC_ZONE_USAGE(perm, service) -= 1;
12162677Sml93401 ASSERT(service->ipcs_count || ((IPC_PROJ_USAGE(perm, service) == 0) &&
12172677Sml93401 (IPC_ZONE_USAGE(perm, service) == 0)));
12180Sstevel@tonic-gate }
12190Sstevel@tonic-gate
12200Sstevel@tonic-gate
12210Sstevel@tonic-gate /*
12220Sstevel@tonic-gate * Common code to perform an IPC_RMID. Returns an errno value on
12230Sstevel@tonic-gate * failure, 0 on success.
12240Sstevel@tonic-gate */
12250Sstevel@tonic-gate int
ipc_rmid(ipc_service_t * service,int id,cred_t * cr)12260Sstevel@tonic-gate ipc_rmid(ipc_service_t *service, int id, cred_t *cr)
12270Sstevel@tonic-gate {
12280Sstevel@tonic-gate kipc_perm_t *perm;
12290Sstevel@tonic-gate kmutex_t *lock;
12300Sstevel@tonic-gate
12310Sstevel@tonic-gate mutex_enter(&service->ipcs_lock);
12320Sstevel@tonic-gate
12330Sstevel@tonic-gate lock = ipc_lookup(service, id, &perm);
12340Sstevel@tonic-gate if (lock == NULL) {
12350Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
12360Sstevel@tonic-gate return (EINVAL);
12370Sstevel@tonic-gate }
12380Sstevel@tonic-gate
12390Sstevel@tonic-gate ASSERT(service->ipcs_count > 0);
12400Sstevel@tonic-gate
12410Sstevel@tonic-gate if (secpolicy_ipc_owner(cr, perm) != 0) {
12420Sstevel@tonic-gate mutex_exit(lock);
12430Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
12440Sstevel@tonic-gate return (EPERM);
12450Sstevel@tonic-gate }
12460Sstevel@tonic-gate
12470Sstevel@tonic-gate /*
12480Sstevel@tonic-gate * Nothing can fail from this point on.
12490Sstevel@tonic-gate */
12500Sstevel@tonic-gate ipc_remove(service, perm);
12510Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
12520Sstevel@tonic-gate
12530Sstevel@tonic-gate /* perform any per-service removal actions */
12540Sstevel@tonic-gate service->ipcs_rmid(perm);
12550Sstevel@tonic-gate
12560Sstevel@tonic-gate ipc_rele(service, perm);
12570Sstevel@tonic-gate
12580Sstevel@tonic-gate return (0);
12590Sstevel@tonic-gate }
12600Sstevel@tonic-gate
12610Sstevel@tonic-gate /*
12620Sstevel@tonic-gate * Implementation for shmids, semids, and msgids. buf is the address
12630Sstevel@tonic-gate * of the user buffer, nids is the size, and pnids is a pointer to
12640Sstevel@tonic-gate * where we write the actual number of ids that [would] have been
12650Sstevel@tonic-gate * copied out.
12660Sstevel@tonic-gate */
12670Sstevel@tonic-gate int
ipc_ids(ipc_service_t * service,int * buf,uint_t nids,uint_t * pnids)12680Sstevel@tonic-gate ipc_ids(ipc_service_t *service, int *buf, uint_t nids, uint_t *pnids)
12690Sstevel@tonic-gate {
12700Sstevel@tonic-gate kipc_perm_t *perm;
12710Sstevel@tonic-gate size_t idsize = 0;
12720Sstevel@tonic-gate int error = 0;
12730Sstevel@tonic-gate int idcount;
12740Sstevel@tonic-gate int *ids;
12750Sstevel@tonic-gate int numids = 0;
12760Sstevel@tonic-gate zoneid_t zoneid = getzoneid();
12770Sstevel@tonic-gate int global = INGLOBALZONE(curproc);
12780Sstevel@tonic-gate
12790Sstevel@tonic-gate if (buf == NULL)
12800Sstevel@tonic-gate nids = 0;
12810Sstevel@tonic-gate
12820Sstevel@tonic-gate /*
12830Sstevel@tonic-gate * Get an accurate count of the total number of ids, and allocate a
12840Sstevel@tonic-gate * staging buffer. Since ipcs_count is always sane, we don't have
12850Sstevel@tonic-gate * to take ipcs_lock for our first guess. If there are no ids, or
12860Sstevel@tonic-gate * we're in the global zone and the number of ids is greater than
12870Sstevel@tonic-gate * the size of the specified buffer, we shunt to the end. Otherwise,
12880Sstevel@tonic-gate * we go through the id list looking for (and counting) what is
12890Sstevel@tonic-gate * visible in the specified zone.
12900Sstevel@tonic-gate */
12910Sstevel@tonic-gate idcount = service->ipcs_count;
12920Sstevel@tonic-gate for (;;) {
12930Sstevel@tonic-gate if ((global && idcount > nids) || idcount == 0) {
12940Sstevel@tonic-gate numids = idcount;
12950Sstevel@tonic-gate nids = 0;
12960Sstevel@tonic-gate goto out;
12970Sstevel@tonic-gate }
12980Sstevel@tonic-gate
12990Sstevel@tonic-gate idsize = idcount * sizeof (int);
13000Sstevel@tonic-gate ids = kmem_alloc(idsize, KM_SLEEP);
13010Sstevel@tonic-gate
13020Sstevel@tonic-gate mutex_enter(&service->ipcs_lock);
13030Sstevel@tonic-gate if (idcount >= service->ipcs_count)
13040Sstevel@tonic-gate break;
13050Sstevel@tonic-gate idcount = service->ipcs_count;
13060Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
13070Sstevel@tonic-gate
13080Sstevel@tonic-gate if (idsize != 0) {
13090Sstevel@tonic-gate kmem_free(ids, idsize);
13100Sstevel@tonic-gate idsize = 0;
13110Sstevel@tonic-gate }
13120Sstevel@tonic-gate }
13130Sstevel@tonic-gate
13140Sstevel@tonic-gate for (perm = list_head(&service->ipcs_usedids); perm != NULL;
13150Sstevel@tonic-gate perm = list_next(&service->ipcs_usedids, perm)) {
13160Sstevel@tonic-gate ASSERT(!IPC_FREE(perm));
13170Sstevel@tonic-gate if (global || perm->ipc_zoneid == zoneid)
13180Sstevel@tonic-gate ids[numids++] = perm->ipc_id;
13190Sstevel@tonic-gate }
13200Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
13210Sstevel@tonic-gate
13220Sstevel@tonic-gate /*
13230Sstevel@tonic-gate * If there isn't enough space to hold all of the ids, just
13240Sstevel@tonic-gate * return the number of ids without copying out any of them.
13250Sstevel@tonic-gate */
13260Sstevel@tonic-gate if (nids < numids)
13270Sstevel@tonic-gate nids = 0;
13280Sstevel@tonic-gate
13290Sstevel@tonic-gate out:
13300Sstevel@tonic-gate if (suword32(pnids, (uint32_t)numids) ||
13310Sstevel@tonic-gate (nids != 0 && copyout(ids, buf, numids * sizeof (int))))
13320Sstevel@tonic-gate error = EFAULT;
13330Sstevel@tonic-gate if (idsize != 0)
13340Sstevel@tonic-gate kmem_free(ids, idsize);
13350Sstevel@tonic-gate return (error);
13360Sstevel@tonic-gate }
13370Sstevel@tonic-gate
13380Sstevel@tonic-gate /*
13390Sstevel@tonic-gate * Destroy IPC objects from the given service that are associated with
13400Sstevel@tonic-gate * the given zone.
13410Sstevel@tonic-gate *
13420Sstevel@tonic-gate * We can't hold on to the service lock when freeing objects, so we
13430Sstevel@tonic-gate * first search the service and move all the objects to a private
13440Sstevel@tonic-gate * list, then walk through and free them after dropping the lock.
13450Sstevel@tonic-gate */
13460Sstevel@tonic-gate void
ipc_remove_zone(ipc_service_t * service,zoneid_t zoneid)13470Sstevel@tonic-gate ipc_remove_zone(ipc_service_t *service, zoneid_t zoneid)
13480Sstevel@tonic-gate {
13490Sstevel@tonic-gate kipc_perm_t *perm, *next;
13500Sstevel@tonic-gate list_t rmlist;
13510Sstevel@tonic-gate kmutex_t *lock;
13520Sstevel@tonic-gate
13530Sstevel@tonic-gate list_create(&rmlist, sizeof (kipc_perm_t),
13540Sstevel@tonic-gate offsetof(kipc_perm_t, ipc_list));
13550Sstevel@tonic-gate
13560Sstevel@tonic-gate mutex_enter(&service->ipcs_lock);
13570Sstevel@tonic-gate for (perm = list_head(&service->ipcs_usedids); perm != NULL;
13580Sstevel@tonic-gate perm = next) {
13590Sstevel@tonic-gate next = list_next(&service->ipcs_usedids, perm);
13600Sstevel@tonic-gate if (perm->ipc_zoneid != zoneid)
13610Sstevel@tonic-gate continue;
13620Sstevel@tonic-gate
13630Sstevel@tonic-gate /*
13640Sstevel@tonic-gate * Remove the object from the service, then put it on
13650Sstevel@tonic-gate * the removal list so we can defer the call to
13660Sstevel@tonic-gate * ipc_rele (which will actually free the structure).
13670Sstevel@tonic-gate * We need to do this since the destructor may grab
13680Sstevel@tonic-gate * the service lock.
13690Sstevel@tonic-gate */
13700Sstevel@tonic-gate ASSERT(!IPC_FREE(perm));
13710Sstevel@tonic-gate lock = ipc_lock(service, perm->ipc_id);
13720Sstevel@tonic-gate ipc_remove(service, perm);
13730Sstevel@tonic-gate mutex_exit(lock);
13740Sstevel@tonic-gate list_insert_tail(&rmlist, perm);
13750Sstevel@tonic-gate }
13760Sstevel@tonic-gate mutex_exit(&service->ipcs_lock);
13770Sstevel@tonic-gate
13780Sstevel@tonic-gate /*
13790Sstevel@tonic-gate * Now that we've dropped the service lock, loop through the
13800Sstevel@tonic-gate * private list freeing removed objects.
13810Sstevel@tonic-gate */
13820Sstevel@tonic-gate for (perm = list_head(&rmlist); perm != NULL; perm = next) {
13830Sstevel@tonic-gate next = list_next(&rmlist, perm);
13840Sstevel@tonic-gate list_remove(&rmlist, perm);
13850Sstevel@tonic-gate
13860Sstevel@tonic-gate (void) ipc_lock(service, perm->ipc_id);
13870Sstevel@tonic-gate
13880Sstevel@tonic-gate /* perform any per-service removal actions */
13890Sstevel@tonic-gate service->ipcs_rmid(perm);
13900Sstevel@tonic-gate
13910Sstevel@tonic-gate /* release reference */
13920Sstevel@tonic-gate ipc_rele(service, perm);
13930Sstevel@tonic-gate }
13940Sstevel@tonic-gate
13950Sstevel@tonic-gate list_destroy(&rmlist);
13960Sstevel@tonic-gate }
1397