10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52565Sudpa * Common Development and Distribution License (the "License"). 62565Sudpa * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*11861SMarek.Pospisil@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 270Sstevel@tonic-gate /* All Rights Reserved */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate 300Sstevel@tonic-gate /* 310Sstevel@tonic-gate * Inter-Process Communication Message Facility. 320Sstevel@tonic-gate * 330Sstevel@tonic-gate * See os/ipc.c for a description of common IPC functionality. 340Sstevel@tonic-gate * 350Sstevel@tonic-gate * Resource controls 360Sstevel@tonic-gate * ----------------- 370Sstevel@tonic-gate * 382677Sml93401 * Control: zone.max-msg-ids (rc_zone_msgmni) 392677Sml93401 * Description: Maximum number of message queue ids allowed a zone. 402677Sml93401 * 412677Sml93401 * When msgget() is used to allocate a message queue, one id is 422677Sml93401 * allocated. If the id allocation doesn't succeed, msgget() fails 432677Sml93401 * and errno is set to ENOSPC. Upon successful msgctl(, IPC_RMID) 442677Sml93401 * the id is deallocated. 452677Sml93401 * 460Sstevel@tonic-gate * Control: project.max-msg-ids (rc_project_msgmni) 470Sstevel@tonic-gate * Description: Maximum number of message queue ids allowed a project. 480Sstevel@tonic-gate * 490Sstevel@tonic-gate * When msgget() is used to allocate a message queue, one id is 500Sstevel@tonic-gate * allocated. If the id allocation doesn't succeed, msgget() fails 510Sstevel@tonic-gate * and errno is set to ENOSPC. Upon successful msgctl(, IPC_RMID) 520Sstevel@tonic-gate * the id is deallocated. 530Sstevel@tonic-gate * 540Sstevel@tonic-gate * Control: process.max-msg-qbytes (rc_process_msgmnb) 550Sstevel@tonic-gate * Description: Maximum number of bytes of messages on a message queue. 560Sstevel@tonic-gate * 570Sstevel@tonic-gate * When msgget() successfully allocates a message queue, the minimum 580Sstevel@tonic-gate * enforced value of this limit is used to initialize msg_qbytes. 590Sstevel@tonic-gate * 600Sstevel@tonic-gate * Control: process.max-msg-messages (rc_process_msgtql) 610Sstevel@tonic-gate * Description: Maximum number of messages on a message queue. 620Sstevel@tonic-gate * 630Sstevel@tonic-gate * When msgget() successfully allocates a message queue, the minimum 640Sstevel@tonic-gate * enforced value of this limit is used to initialize a per-queue 650Sstevel@tonic-gate * limit on the number of messages. 660Sstevel@tonic-gate */ 670Sstevel@tonic-gate 680Sstevel@tonic-gate #include <sys/types.h> 690Sstevel@tonic-gate #include <sys/t_lock.h> 700Sstevel@tonic-gate #include <sys/param.h> 710Sstevel@tonic-gate #include <sys/cred.h> 720Sstevel@tonic-gate #include <sys/user.h> 730Sstevel@tonic-gate #include <sys/proc.h> 740Sstevel@tonic-gate #include <sys/time.h> 750Sstevel@tonic-gate #include <sys/ipc.h> 760Sstevel@tonic-gate #include <sys/ipc_impl.h> 770Sstevel@tonic-gate #include <sys/msg.h> 780Sstevel@tonic-gate #include <sys/msg_impl.h> 790Sstevel@tonic-gate #include <sys/list.h> 800Sstevel@tonic-gate #include <sys/systm.h> 810Sstevel@tonic-gate #include <sys/sysmacros.h> 820Sstevel@tonic-gate #include <sys/cpuvar.h> 830Sstevel@tonic-gate #include <sys/kmem.h> 840Sstevel@tonic-gate #include <sys/ddi.h> 850Sstevel@tonic-gate #include <sys/errno.h> 860Sstevel@tonic-gate #include <sys/cmn_err.h> 870Sstevel@tonic-gate #include <sys/debug.h> 880Sstevel@tonic-gate #include <sys/project.h> 890Sstevel@tonic-gate #include <sys/modctl.h> 900Sstevel@tonic-gate #include <sys/syscall.h> 910Sstevel@tonic-gate #include <sys/policy.h> 920Sstevel@tonic-gate #include <sys/zone.h> 930Sstevel@tonic-gate 940Sstevel@tonic-gate #include <c2/audit.h> 950Sstevel@tonic-gate 960Sstevel@tonic-gate /* 970Sstevel@tonic-gate * The following tunables are obsolete. Though for compatibility we 980Sstevel@tonic-gate * still read and interpret msginfo_msgmnb, msginfo_msgmni, and 990Sstevel@tonic-gate * msginfo_msgtql (see os/project.c and os/rctl_proc.c), the preferred 1000Sstevel@tonic-gate * mechanism for administrating the IPC Message facility is through the 1010Sstevel@tonic-gate * resource controls described at the top of this file. 1020Sstevel@tonic-gate */ 1030Sstevel@tonic-gate size_t msginfo_msgmax = 2048; /* (obsolete) */ 1040Sstevel@tonic-gate size_t msginfo_msgmnb = 4096; /* (obsolete) */ 1050Sstevel@tonic-gate int msginfo_msgmni = 50; /* (obsolete) */ 1060Sstevel@tonic-gate int msginfo_msgtql = 40; /* (obsolete) */ 1070Sstevel@tonic-gate int msginfo_msgssz = 8; /* (obsolete) */ 1080Sstevel@tonic-gate int msginfo_msgmap = 0; /* (obsolete) */ 1090Sstevel@tonic-gate ushort_t msginfo_msgseg = 1024; /* (obsolete) */ 1100Sstevel@tonic-gate 1112677Sml93401 extern rctl_hndl_t rc_zone_msgmni; 1120Sstevel@tonic-gate extern rctl_hndl_t rc_project_msgmni; 1130Sstevel@tonic-gate extern rctl_hndl_t rc_process_msgmnb; 1140Sstevel@tonic-gate extern rctl_hndl_t rc_process_msgtql; 1150Sstevel@tonic-gate static ipc_service_t *msq_svc; 1160Sstevel@tonic-gate static zone_key_t msg_zone_key; 1170Sstevel@tonic-gate 1180Sstevel@tonic-gate static void msg_dtor(kipc_perm_t *); 1190Sstevel@tonic-gate static void msg_rmid(kipc_perm_t *); 1200Sstevel@tonic-gate static void msg_remove_zone(zoneid_t, void *); 1210Sstevel@tonic-gate 1220Sstevel@tonic-gate /* 1230Sstevel@tonic-gate * Module linkage information for the kernel. 1240Sstevel@tonic-gate */ 1250Sstevel@tonic-gate static ssize_t msgsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, 1260Sstevel@tonic-gate uintptr_t a4, uintptr_t a5); 1270Sstevel@tonic-gate 1280Sstevel@tonic-gate static struct sysent ipcmsg_sysent = { 1290Sstevel@tonic-gate 6, 1300Sstevel@tonic-gate #ifdef _LP64 1310Sstevel@tonic-gate SE_ARGC | SE_NOUNLOAD | SE_64RVAL, 1320Sstevel@tonic-gate #else 1330Sstevel@tonic-gate SE_ARGC | SE_NOUNLOAD | SE_32RVAL1, 1340Sstevel@tonic-gate #endif 1350Sstevel@tonic-gate (int (*)())msgsys 1360Sstevel@tonic-gate }; 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1390Sstevel@tonic-gate static ssize32_t msgsys32(int opcode, uint32_t a0, uint32_t a1, uint32_t a2, 1400Sstevel@tonic-gate uint32_t a4, uint32_t a5); 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate static struct sysent ipcmsg_sysent32 = { 1430Sstevel@tonic-gate 6, 1440Sstevel@tonic-gate SE_ARGC | SE_NOUNLOAD | SE_32RVAL1, 1450Sstevel@tonic-gate (int (*)())msgsys32 1460Sstevel@tonic-gate }; 1470Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1480Sstevel@tonic-gate 1490Sstevel@tonic-gate static struct modlsys modlsys = { 1500Sstevel@tonic-gate &mod_syscallops, "System V message facility", &ipcmsg_sysent 1510Sstevel@tonic-gate }; 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1540Sstevel@tonic-gate static struct modlsys modlsys32 = { 1550Sstevel@tonic-gate &mod_syscallops32, "32-bit System V message facility", &ipcmsg_sysent32 1560Sstevel@tonic-gate }; 1570Sstevel@tonic-gate #endif 1580Sstevel@tonic-gate 1594153Sdv142724 /* 1604153Sdv142724 * Big Theory statement for message queue correctness 1614153Sdv142724 * 1624153Sdv142724 * The msgrcv and msgsnd functions no longer uses cv_broadcast to wake up 1634153Sdv142724 * receivers who are waiting for an event. Using the cv_broadcast method 1644153Sdv142724 * resulted in negative scaling when the number of waiting receivers are large 1654153Sdv142724 * (the thundering herd problem). Instead, the receivers waiting to receive a 1664153Sdv142724 * message are now linked in a queue-like fashion and awaken one at a time in 1674153Sdv142724 * a controlled manner. 1684153Sdv142724 * 1694153Sdv142724 * Receivers can block on two different classes of waiting list: 1704153Sdv142724 * 1) "sendwait" list, which is the more complex list of the two. The 1714153Sdv142724 * receiver will be awakened by a sender posting a new message. There 1724153Sdv142724 * are two types of "sendwait" list used: 1734153Sdv142724 * a) msg_wait_snd: handles all receivers who are looking for 1744153Sdv142724 * a message type >= 0, but was unable to locate a match. 1754153Sdv142724 * 1764153Sdv142724 * slot 0: reserved for receivers that have designated they 1774153Sdv142724 * will take any message type. 1784153Sdv142724 * rest: consist of receivers requesting a specific type 1794153Sdv142724 * but the type was not present. The entries are 1804153Sdv142724 * hashed into a bucket in an attempt to keep 1814153Sdv142724 * any list search relatively short. 1824153Sdv142724 * b) msg_wait_snd_ngt: handles all receivers that have designated 1834153Sdv142724 * a negative message type. Unlike msg_wait_snd, the hash bucket 1844153Sdv142724 * serves a range of negative message types (-1 to -5, -6 to -10 1854153Sdv142724 * and so forth), where the last bucket is reserved for all the 1864153Sdv142724 * negative message types that hash outside of MSG_MAX_QNUM - 1. 1874153Sdv142724 * This is done this way to simplify the operation of locating a 1884153Sdv142724 * negative message type. 1894153Sdv142724 * 1904153Sdv142724 * 2) "copyout" list, where the receiver is awakened by another 1914153Sdv142724 * receiver after a message is copied out. This is a linked list 1924153Sdv142724 * of waiters that are awakened one at a time. Although the solution is 1934153Sdv142724 * not optimal, the complexity that would be added in for waking 1944153Sdv142724 * up the right entry far exceeds any potential pay back (too many 1954153Sdv142724 * correctness and corner case issues). 1964153Sdv142724 * 1974153Sdv142724 * The lists are doubly linked. In the case of the "sendwait" 1984153Sdv142724 * list, this allows the thread to remove itself from the list without having 1994153Sdv142724 * to traverse the list. In the case of the "copyout" list it simply allows 2004153Sdv142724 * us to use common functions with the "sendwait" list. 2014153Sdv142724 * 2024153Sdv142724 * To make sure receivers are not hung out to dry, we must guarantee: 2034153Sdv142724 * 1. If any queued message matches any receiver, then at least one 2044153Sdv142724 * matching receiver must be processing the request. 2054153Sdv142724 * 2. Blocking on the copyout queue is only temporary while messages 2064153Sdv142724 * are being copied out. The process is guaranted to wakeup 2074153Sdv142724 * when it gets to front of the queue (copyout is a FIFO). 2084153Sdv142724 * 2094153Sdv142724 * Rules for blocking and waking up: 2104153Sdv142724 * 1. A receiver entering msgrcv must examine all messages for a match 2114153Sdv142724 * before blocking on a sendwait queue. 2124153Sdv142724 * 2. If the receiver blocks because the message it chose is already 2134153Sdv142724 * being copied out, then when it wakes up needs to start start 2144153Sdv142724 * checking the messages from the beginning. 2154153Sdv142724 * 3) When ever a process returns from msgrcv for any reason, if it 2164153Sdv142724 * had attempted to copy a message or blocked waiting for a copy 2174153Sdv142724 * to complete it needs to wakeup the next receiver blocked on 2184153Sdv142724 * a copy out. 2194153Sdv142724 * 4) When a message is sent, the sender selects a process waiting 2204153Sdv142724 * for that type of message. This selection process rotates between 2214153Sdv142724 * receivers types of 0, negative and positive to prevent starvation of 2224153Sdv142724 * any one particular receiver type. 2234153Sdv142724 * 5) The following are the scenarios for processes that are awakened 2244153Sdv142724 * by a msgsnd: 2254153Sdv142724 * a) The process finds the message and is able to copy 2264153Sdv142724 * it out. Once complete, the process returns. 2274153Sdv142724 * b) The message that was sent that triggered the wakeup is no 2284153Sdv142724 * longer available (another process found the message first). 2294153Sdv142724 * We issue a wakeup on copy queue and then go back to 2304153Sdv142724 * sleep waiting for another matching message to be sent. 2314153Sdv142724 * c) The message that was supposed to be processed was 2324153Sdv142724 * already serviced by another process. However a different 2334153Sdv142724 * message is present which we can service. The message 2344153Sdv142724 * is copied and the process returns. 2354153Sdv142724 * d) The message is found, but some sort of error occurs that 2364153Sdv142724 * prevents the message from being copied. The receiver 2374153Sdv142724 * wakes up the next sender that can service this message 2384153Sdv142724 * type and returns an error to the caller. 2394153Sdv142724 * e) The message is found, but it is marked as being copied 2404153Sdv142724 * out. The receiver then goes to sleep on the copyout 2414153Sdv142724 * queue where it will be awakened again sometime in the future. 2424153Sdv142724 * 2434153Sdv142724 * 2444153Sdv142724 * 6) Whenever a message is found that matches the message type designated, 2454153Sdv142724 * but is being copied out we have to block on the copyout queue. 2464153Sdv142724 * After process copying finishes the copy out, it must wakeup (either 2474153Sdv142724 * directly or indirectly) all receivers who blocked on its copyout, 2484153Sdv142724 * so they are guaranteed a chance to examine the remaining messages. 2494153Sdv142724 * This is implemented via a chain of wakeups: Y wakes X, who wakes Z, 2504153Sdv142724 * and so on. The chain cannot be broken. This leads to the following 2514153Sdv142724 * cases: 2524153Sdv142724 * a) A receiver is finished copying the message (or encountered) 2534153Sdv142724 * an error), the first entry on the copyout queue is woken 2544153Sdv142724 * up. 2554153Sdv142724 * b) When the receiver is woken up, it attempts to locate 2564153Sdv142724 * a message type match. 2574153Sdv142724 * c) If a message type is found and 2584153Sdv142724 * -- MSG_RCVCOPY flag is not set, the message is 2594153Sdv142724 * marked for copying out. Regardless of the copyout 2604153Sdv142724 * success the next entry on the copyout queue is 2614153Sdv142724 * awakened and the operation is completed. 2624153Sdv142724 * -- MSG_RCVCOPY is set, we simply go back to sleep again 2634153Sdv142724 * on the copyout queue. 2644153Sdv142724 * d) If the message type is not found then we wakeup the next 2654153Sdv142724 * process on the copyout queue. 2668834SDavid.Valin@Sun.COM * 7) If a msgsnd is unable to complete for of any of the following reasons 2678834SDavid.Valin@Sun.COM * a) the msgq has no space for the message 2688834SDavid.Valin@Sun.COM * b) the maximum number of messages allowed has been reached 2698834SDavid.Valin@Sun.COM * then one of two things happen: 2708834SDavid.Valin@Sun.COM * 1) If the passed in msg_flag has IPC_NOWAIT set, then 2718834SDavid.Valin@Sun.COM * an error is returned. 2728834SDavid.Valin@Sun.COM * 2) The IPC_NOWAIT bit is not set in msg_flag, then the 2738834SDavid.Valin@Sun.COM * the thread is placed to sleep until the request can be 2748834SDavid.Valin@Sun.COM * serviced. 2758834SDavid.Valin@Sun.COM * 8) When waking a thread waiting to send a message, a check is done to 2768834SDavid.Valin@Sun.COM * verify that the operation being asked for by the thread will complete. 2778834SDavid.Valin@Sun.COM * This decision making process is done in a loop where the oldest request 2788834SDavid.Valin@Sun.COM * is checked first. The search will continue until there is no more 2798834SDavid.Valin@Sun.COM * room on the msgq or we have checked all the waiters. 2804153Sdv142724 */ 2814153Sdv142724 2826071Sdv142724 static uint_t msg_type_hash(long); 2834153Sdv142724 static int msgq_check_err(kmsqid_t *qp, int cvres); 2844153Sdv142724 static int msg_rcvq_sleep(list_t *, msgq_wakeup_t *, kmutex_t **, 2854153Sdv142724 kmsqid_t *); 2864153Sdv142724 static int msg_copyout(kmsqid_t *, long, kmutex_t **, size_t *, size_t, 2874153Sdv142724 struct msg *, struct ipcmsgbuf *, int); 2884153Sdv142724 static void msg_rcvq_wakeup_all(list_t *); 2898834SDavid.Valin@Sun.COM static void msg_wakeup_senders(kmsqid_t *); 2904153Sdv142724 static void msg_wakeup_rdr(kmsqid_t *, msg_select_t **, long); 2914153Sdv142724 static msgq_wakeup_t *msg_fnd_any_snd(kmsqid_t *, int, long); 2924153Sdv142724 static msgq_wakeup_t *msg_fnd_any_rdr(kmsqid_t *, int, long); 2934153Sdv142724 static msgq_wakeup_t *msg_fnd_neg_snd(kmsqid_t *, int, long); 2944153Sdv142724 static msgq_wakeup_t *msg_fnd_spc_snd(kmsqid_t *, int, long); 2954153Sdv142724 static struct msg *msgrcv_lookup(kmsqid_t *, long); 2964153Sdv142724 2974153Sdv142724 msg_select_t msg_fnd_sndr[] = { 2984153Sdv142724 { msg_fnd_any_snd, &msg_fnd_sndr[1] }, 2994153Sdv142724 { msg_fnd_spc_snd, &msg_fnd_sndr[2] }, 3004153Sdv142724 { msg_fnd_neg_snd, &msg_fnd_sndr[0] } 3014153Sdv142724 }; 3024153Sdv142724 3034153Sdv142724 msg_select_t msg_fnd_rdr[1] = { 3044153Sdv142724 { msg_fnd_any_rdr, &msg_fnd_rdr[0] }, 3054153Sdv142724 }; 3064153Sdv142724 3070Sstevel@tonic-gate static struct modlinkage modlinkage = { 3080Sstevel@tonic-gate MODREV_1, 3090Sstevel@tonic-gate &modlsys, 3100Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 3110Sstevel@tonic-gate &modlsys32, 3120Sstevel@tonic-gate #endif 3130Sstevel@tonic-gate NULL 3140Sstevel@tonic-gate }; 3150Sstevel@tonic-gate 3168834SDavid.Valin@Sun.COM #define MSG_SMALL_INIT (size_t)-1 3170Sstevel@tonic-gate int 3180Sstevel@tonic-gate _init(void) 3190Sstevel@tonic-gate { 3200Sstevel@tonic-gate int result; 3210Sstevel@tonic-gate 3222677Sml93401 msq_svc = ipcs_create("msqids", rc_project_msgmni, rc_zone_msgmni, 3232677Sml93401 sizeof (kmsqid_t), msg_dtor, msg_rmid, AT_IPC_MSG, 3242677Sml93401 offsetof(ipc_rqty_t, ipcq_msgmni)); 3250Sstevel@tonic-gate zone_key_create(&msg_zone_key, NULL, msg_remove_zone, NULL); 3260Sstevel@tonic-gate 3270Sstevel@tonic-gate if ((result = mod_install(&modlinkage)) == 0) 3280Sstevel@tonic-gate return (0); 3290Sstevel@tonic-gate 3300Sstevel@tonic-gate (void) zone_key_delete(msg_zone_key); 3310Sstevel@tonic-gate ipcs_destroy(msq_svc); 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate return (result); 3340Sstevel@tonic-gate } 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate int 3370Sstevel@tonic-gate _fini(void) 3380Sstevel@tonic-gate { 3390Sstevel@tonic-gate return (EBUSY); 3400Sstevel@tonic-gate } 3410Sstevel@tonic-gate 3420Sstevel@tonic-gate int 3430Sstevel@tonic-gate _info(struct modinfo *modinfop) 3440Sstevel@tonic-gate { 3450Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 3460Sstevel@tonic-gate } 3470Sstevel@tonic-gate 3480Sstevel@tonic-gate static void 3490Sstevel@tonic-gate msg_dtor(kipc_perm_t *perm) 3500Sstevel@tonic-gate { 3510Sstevel@tonic-gate kmsqid_t *qp = (kmsqid_t *)perm; 3522565Sudpa int ii; 3530Sstevel@tonic-gate 3544153Sdv142724 for (ii = 0; ii <= MSG_MAX_QNUM; ii++) { 3554153Sdv142724 ASSERT(list_is_empty(&qp->msg_wait_snd[ii])); 3564153Sdv142724 ASSERT(list_is_empty(&qp->msg_wait_snd_ngt[ii])); 3574153Sdv142724 list_destroy(&qp->msg_wait_snd[ii]); 3584153Sdv142724 list_destroy(&qp->msg_wait_snd_ngt[ii]); 3594153Sdv142724 } 3604153Sdv142724 ASSERT(list_is_empty(&qp->msg_cpy_block)); 3618834SDavid.Valin@Sun.COM ASSERT(list_is_empty(&qp->msg_wait_rcv)); 3624153Sdv142724 list_destroy(&qp->msg_cpy_block); 3630Sstevel@tonic-gate ASSERT(qp->msg_snd_cnt == 0); 3640Sstevel@tonic-gate ASSERT(qp->msg_cbytes == 0); 3650Sstevel@tonic-gate list_destroy(&qp->msg_list); 3668834SDavid.Valin@Sun.COM list_destroy(&qp->msg_wait_rcv); 3670Sstevel@tonic-gate } 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate 3700Sstevel@tonic-gate #define msg_hold(mp) (mp)->msg_copycnt++ 3710Sstevel@tonic-gate 3720Sstevel@tonic-gate /* 3730Sstevel@tonic-gate * msg_rele - decrement the reference count on the message. When count 3740Sstevel@tonic-gate * reaches zero, free message header and contents. 3750Sstevel@tonic-gate */ 3760Sstevel@tonic-gate static void 3770Sstevel@tonic-gate msg_rele(struct msg *mp) 3780Sstevel@tonic-gate { 3790Sstevel@tonic-gate ASSERT(mp->msg_copycnt > 0); 3800Sstevel@tonic-gate if (mp->msg_copycnt-- == 1) { 3810Sstevel@tonic-gate if (mp->msg_addr) 3820Sstevel@tonic-gate kmem_free(mp->msg_addr, mp->msg_size); 3830Sstevel@tonic-gate kmem_free(mp, sizeof (struct msg)); 3840Sstevel@tonic-gate } 3850Sstevel@tonic-gate } 3860Sstevel@tonic-gate 3870Sstevel@tonic-gate /* 3880Sstevel@tonic-gate * msgunlink - Unlink msg from queue, decrement byte count and wake up anyone 3890Sstevel@tonic-gate * waiting for free bytes on queue. 3900Sstevel@tonic-gate * 3910Sstevel@tonic-gate * Called with queue locked. 3920Sstevel@tonic-gate */ 3930Sstevel@tonic-gate static void 3940Sstevel@tonic-gate msgunlink(kmsqid_t *qp, struct msg *mp) 3950Sstevel@tonic-gate { 3960Sstevel@tonic-gate list_remove(&qp->msg_list, mp); 3970Sstevel@tonic-gate qp->msg_qnum--; 3980Sstevel@tonic-gate qp->msg_cbytes -= mp->msg_size; 3990Sstevel@tonic-gate msg_rele(mp); 4000Sstevel@tonic-gate 4010Sstevel@tonic-gate /* Wake up waiting writers */ 4028834SDavid.Valin@Sun.COM msg_wakeup_senders(qp); 4030Sstevel@tonic-gate } 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate static void 4060Sstevel@tonic-gate msg_rmid(kipc_perm_t *perm) 4070Sstevel@tonic-gate { 4080Sstevel@tonic-gate kmsqid_t *qp = (kmsqid_t *)perm; 4090Sstevel@tonic-gate struct msg *mp; 4102565Sudpa int ii; 4110Sstevel@tonic-gate 4120Sstevel@tonic-gate 4130Sstevel@tonic-gate while ((mp = list_head(&qp->msg_list)) != NULL) 4140Sstevel@tonic-gate msgunlink(qp, mp); 4150Sstevel@tonic-gate ASSERT(qp->msg_cbytes == 0); 4160Sstevel@tonic-gate 4174153Sdv142724 /* 4184153Sdv142724 * Wake up everyone who is in a wait state of some sort 4194153Sdv142724 * for this message queue. 4204153Sdv142724 */ 4214153Sdv142724 for (ii = 0; ii <= MSG_MAX_QNUM; ii++) { 4224153Sdv142724 msg_rcvq_wakeup_all(&qp->msg_wait_snd[ii]); 4234153Sdv142724 msg_rcvq_wakeup_all(&qp->msg_wait_snd_ngt[ii]); 4242565Sudpa } 4254153Sdv142724 msg_rcvq_wakeup_all(&qp->msg_cpy_block); 4268834SDavid.Valin@Sun.COM msg_rcvq_wakeup_all(&qp->msg_wait_rcv); 4270Sstevel@tonic-gate } 4280Sstevel@tonic-gate 4290Sstevel@tonic-gate /* 4300Sstevel@tonic-gate * msgctl system call. 4310Sstevel@tonic-gate * 4320Sstevel@tonic-gate * gets q lock (via ipc_lookup), releases before return. 4330Sstevel@tonic-gate * may call users of msg_lock 4340Sstevel@tonic-gate */ 4350Sstevel@tonic-gate static int 4360Sstevel@tonic-gate msgctl(int msgid, int cmd, void *arg) 4370Sstevel@tonic-gate { 4380Sstevel@tonic-gate STRUCT_DECL(msqid_ds, ds); /* SVR4 queue work area */ 4390Sstevel@tonic-gate kmsqid_t *qp; /* ptr to associated q */ 4404153Sdv142724 int error; 4410Sstevel@tonic-gate struct cred *cr; 4420Sstevel@tonic-gate model_t mdl = get_udatamodel(); 4430Sstevel@tonic-gate struct msqid_ds64 ds64; 4440Sstevel@tonic-gate kmutex_t *lock; 4450Sstevel@tonic-gate proc_t *pp = curproc; 4460Sstevel@tonic-gate 4470Sstevel@tonic-gate STRUCT_INIT(ds, mdl); 4480Sstevel@tonic-gate cr = CRED(); 4490Sstevel@tonic-gate 4500Sstevel@tonic-gate /* 4510Sstevel@tonic-gate * Perform pre- or non-lookup actions (e.g. copyins, RMID). 4520Sstevel@tonic-gate */ 4530Sstevel@tonic-gate switch (cmd) { 4540Sstevel@tonic-gate case IPC_SET: 4550Sstevel@tonic-gate if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds))) 4560Sstevel@tonic-gate return (set_errno(EFAULT)); 4570Sstevel@tonic-gate break; 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate case IPC_SET64: 4600Sstevel@tonic-gate if (copyin(arg, &ds64, sizeof (struct msqid_ds64))) 4610Sstevel@tonic-gate return (set_errno(EFAULT)); 4620Sstevel@tonic-gate break; 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate case IPC_RMID: 4650Sstevel@tonic-gate if (error = ipc_rmid(msq_svc, msgid, cr)) 4660Sstevel@tonic-gate return (set_errno(error)); 4670Sstevel@tonic-gate return (0); 4680Sstevel@tonic-gate } 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate /* 4710Sstevel@tonic-gate * get msqid_ds for this msgid 4720Sstevel@tonic-gate */ 4730Sstevel@tonic-gate if ((lock = ipc_lookup(msq_svc, msgid, (kipc_perm_t **)&qp)) == NULL) 4740Sstevel@tonic-gate return (set_errno(EINVAL)); 4750Sstevel@tonic-gate 4760Sstevel@tonic-gate switch (cmd) { 4770Sstevel@tonic-gate case IPC_SET: 4780Sstevel@tonic-gate if (STRUCT_FGET(ds, msg_qbytes) > qp->msg_qbytes && 4790Sstevel@tonic-gate secpolicy_ipc_config(cr) != 0) { 4800Sstevel@tonic-gate mutex_exit(lock); 4810Sstevel@tonic-gate return (set_errno(EPERM)); 4820Sstevel@tonic-gate } 4830Sstevel@tonic-gate if (error = ipcperm_set(msq_svc, cr, &qp->msg_perm, 4840Sstevel@tonic-gate &STRUCT_BUF(ds)->msg_perm, mdl)) { 4850Sstevel@tonic-gate mutex_exit(lock); 4860Sstevel@tonic-gate return (set_errno(error)); 4870Sstevel@tonic-gate } 4880Sstevel@tonic-gate qp->msg_qbytes = STRUCT_FGET(ds, msg_qbytes); 4890Sstevel@tonic-gate qp->msg_ctime = gethrestime_sec(); 4900Sstevel@tonic-gate break; 4910Sstevel@tonic-gate 4920Sstevel@tonic-gate case IPC_STAT: 4930Sstevel@tonic-gate if (error = ipcperm_access(&qp->msg_perm, MSG_R, cr)) { 4940Sstevel@tonic-gate mutex_exit(lock); 4950Sstevel@tonic-gate return (set_errno(error)); 4960Sstevel@tonic-gate } 4970Sstevel@tonic-gate 4984153Sdv142724 if (qp->msg_rcv_cnt) 4994153Sdv142724 qp->msg_perm.ipc_mode |= MSG_RWAIT; 5000Sstevel@tonic-gate if (qp->msg_snd_cnt) 5010Sstevel@tonic-gate qp->msg_perm.ipc_mode |= MSG_WWAIT; 5020Sstevel@tonic-gate ipcperm_stat(&STRUCT_BUF(ds)->msg_perm, &qp->msg_perm, mdl); 5030Sstevel@tonic-gate qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT); 5040Sstevel@tonic-gate STRUCT_FSETP(ds, msg_first, NULL); /* kernel addr */ 5050Sstevel@tonic-gate STRUCT_FSETP(ds, msg_last, NULL); 5060Sstevel@tonic-gate STRUCT_FSET(ds, msg_cbytes, qp->msg_cbytes); 5070Sstevel@tonic-gate STRUCT_FSET(ds, msg_qnum, qp->msg_qnum); 5080Sstevel@tonic-gate STRUCT_FSET(ds, msg_qbytes, qp->msg_qbytes); 5090Sstevel@tonic-gate STRUCT_FSET(ds, msg_lspid, qp->msg_lspid); 5100Sstevel@tonic-gate STRUCT_FSET(ds, msg_lrpid, qp->msg_lrpid); 5110Sstevel@tonic-gate STRUCT_FSET(ds, msg_stime, qp->msg_stime); 5120Sstevel@tonic-gate STRUCT_FSET(ds, msg_rtime, qp->msg_rtime); 5130Sstevel@tonic-gate STRUCT_FSET(ds, msg_ctime, qp->msg_ctime); 5140Sstevel@tonic-gate break; 5150Sstevel@tonic-gate 5160Sstevel@tonic-gate case IPC_SET64: 5170Sstevel@tonic-gate mutex_enter(&pp->p_lock); 5180Sstevel@tonic-gate if ((ds64.msgx_qbytes > qp->msg_qbytes) && 5190Sstevel@tonic-gate secpolicy_ipc_config(cr) != 0 && 5200Sstevel@tonic-gate rctl_test(rc_process_msgmnb, pp->p_rctls, pp, 5210Sstevel@tonic-gate ds64.msgx_qbytes, RCA_SAFE) & RCT_DENY) { 5220Sstevel@tonic-gate mutex_exit(&pp->p_lock); 5230Sstevel@tonic-gate mutex_exit(lock); 5240Sstevel@tonic-gate return (set_errno(EPERM)); 5250Sstevel@tonic-gate } 5260Sstevel@tonic-gate mutex_exit(&pp->p_lock); 5270Sstevel@tonic-gate if (error = ipcperm_set64(msq_svc, cr, &qp->msg_perm, 5280Sstevel@tonic-gate &ds64.msgx_perm)) { 5290Sstevel@tonic-gate mutex_exit(lock); 5300Sstevel@tonic-gate return (set_errno(error)); 5310Sstevel@tonic-gate } 5320Sstevel@tonic-gate qp->msg_qbytes = ds64.msgx_qbytes; 5330Sstevel@tonic-gate qp->msg_ctime = gethrestime_sec(); 5340Sstevel@tonic-gate break; 5350Sstevel@tonic-gate 5360Sstevel@tonic-gate case IPC_STAT64: 5374153Sdv142724 if (qp->msg_rcv_cnt) 5384153Sdv142724 qp->msg_perm.ipc_mode |= MSG_RWAIT; 5390Sstevel@tonic-gate if (qp->msg_snd_cnt) 5400Sstevel@tonic-gate qp->msg_perm.ipc_mode |= MSG_WWAIT; 5410Sstevel@tonic-gate ipcperm_stat64(&ds64.msgx_perm, &qp->msg_perm); 5420Sstevel@tonic-gate qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT); 5430Sstevel@tonic-gate ds64.msgx_cbytes = qp->msg_cbytes; 5440Sstevel@tonic-gate ds64.msgx_qnum = qp->msg_qnum; 5450Sstevel@tonic-gate ds64.msgx_qbytes = qp->msg_qbytes; 5460Sstevel@tonic-gate ds64.msgx_lspid = qp->msg_lspid; 5470Sstevel@tonic-gate ds64.msgx_lrpid = qp->msg_lrpid; 5480Sstevel@tonic-gate ds64.msgx_stime = qp->msg_stime; 5490Sstevel@tonic-gate ds64.msgx_rtime = qp->msg_rtime; 5500Sstevel@tonic-gate ds64.msgx_ctime = qp->msg_ctime; 5510Sstevel@tonic-gate break; 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate default: 5540Sstevel@tonic-gate mutex_exit(lock); 5550Sstevel@tonic-gate return (set_errno(EINVAL)); 5560Sstevel@tonic-gate } 5570Sstevel@tonic-gate 5580Sstevel@tonic-gate mutex_exit(lock); 5590Sstevel@tonic-gate 5600Sstevel@tonic-gate /* 5610Sstevel@tonic-gate * Do copyout last (after releasing mutex). 5620Sstevel@tonic-gate */ 5630Sstevel@tonic-gate switch (cmd) { 5640Sstevel@tonic-gate case IPC_STAT: 5650Sstevel@tonic-gate if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds))) 5660Sstevel@tonic-gate return (set_errno(EFAULT)); 5670Sstevel@tonic-gate break; 5680Sstevel@tonic-gate 5690Sstevel@tonic-gate case IPC_STAT64: 5700Sstevel@tonic-gate if (copyout(&ds64, arg, sizeof (struct msqid_ds64))) 5710Sstevel@tonic-gate return (set_errno(EFAULT)); 5720Sstevel@tonic-gate break; 5730Sstevel@tonic-gate } 5740Sstevel@tonic-gate 5750Sstevel@tonic-gate return (0); 5760Sstevel@tonic-gate } 5770Sstevel@tonic-gate 5780Sstevel@tonic-gate /* 5790Sstevel@tonic-gate * Remove all message queues associated with a given zone. Called by 5800Sstevel@tonic-gate * zone_shutdown when the zone is halted. 5810Sstevel@tonic-gate */ 5820Sstevel@tonic-gate /*ARGSUSED1*/ 5830Sstevel@tonic-gate static void 5840Sstevel@tonic-gate msg_remove_zone(zoneid_t zoneid, void *arg) 5850Sstevel@tonic-gate { 5860Sstevel@tonic-gate ipc_remove_zone(msq_svc, zoneid); 5870Sstevel@tonic-gate } 5880Sstevel@tonic-gate 5890Sstevel@tonic-gate /* 5900Sstevel@tonic-gate * msgget system call. 5910Sstevel@tonic-gate */ 5920Sstevel@tonic-gate static int 5930Sstevel@tonic-gate msgget(key_t key, int msgflg) 5940Sstevel@tonic-gate { 5950Sstevel@tonic-gate kmsqid_t *qp; 5960Sstevel@tonic-gate kmutex_t *lock; 5970Sstevel@tonic-gate int id, error; 5982565Sudpa int ii; 5990Sstevel@tonic-gate proc_t *pp = curproc; 6000Sstevel@tonic-gate 6010Sstevel@tonic-gate top: 6020Sstevel@tonic-gate if (error = ipc_get(msq_svc, key, msgflg, (kipc_perm_t **)&qp, &lock)) 6030Sstevel@tonic-gate return (set_errno(error)); 6040Sstevel@tonic-gate 6050Sstevel@tonic-gate if (IPC_FREE(&qp->msg_perm)) { 6060Sstevel@tonic-gate mutex_exit(lock); 6070Sstevel@tonic-gate mutex_exit(&pp->p_lock); 6080Sstevel@tonic-gate 6090Sstevel@tonic-gate list_create(&qp->msg_list, sizeof (struct msg), 6100Sstevel@tonic-gate offsetof(struct msg, msg_node)); 6110Sstevel@tonic-gate qp->msg_qnum = 0; 6120Sstevel@tonic-gate qp->msg_lspid = qp->msg_lrpid = 0; 6130Sstevel@tonic-gate qp->msg_stime = qp->msg_rtime = 0; 6140Sstevel@tonic-gate qp->msg_ctime = gethrestime_sec(); 6154153Sdv142724 qp->msg_ngt_cnt = 0; 6164153Sdv142724 qp->msg_neg_copy = 0; 6174153Sdv142724 for (ii = 0; ii <= MSG_MAX_QNUM; ii++) { 6184153Sdv142724 list_create(&qp->msg_wait_snd[ii], 6194153Sdv142724 sizeof (msgq_wakeup_t), 6204153Sdv142724 offsetof(msgq_wakeup_t, msgw_list)); 6214153Sdv142724 list_create(&qp->msg_wait_snd_ngt[ii], 6224153Sdv142724 sizeof (msgq_wakeup_t), 6234153Sdv142724 offsetof(msgq_wakeup_t, msgw_list)); 6244153Sdv142724 } 6254153Sdv142724 /* 6264153Sdv142724 * The proper initialization of msg_lowest_type is to the 6274153Sdv142724 * highest possible value. By doing this we guarantee that 6284153Sdv142724 * when the first send happens, the lowest type will be set 6294153Sdv142724 * properly. 6304153Sdv142724 */ 6318834SDavid.Valin@Sun.COM qp->msg_lowest_type = MSG_SMALL_INIT; 6324153Sdv142724 list_create(&qp->msg_cpy_block, 6334153Sdv142724 sizeof (msgq_wakeup_t), 6344153Sdv142724 offsetof(msgq_wakeup_t, msgw_list)); 6358834SDavid.Valin@Sun.COM list_create(&qp->msg_wait_rcv, 6368834SDavid.Valin@Sun.COM sizeof (msgq_wakeup_t), 6378834SDavid.Valin@Sun.COM offsetof(msgq_wakeup_t, msgw_list)); 6384153Sdv142724 qp->msg_fnd_sndr = &msg_fnd_sndr[0]; 6394153Sdv142724 qp->msg_fnd_rdr = &msg_fnd_rdr[0]; 6404153Sdv142724 qp->msg_rcv_cnt = 0; 6412565Sudpa qp->msg_snd_cnt = 0; 6428834SDavid.Valin@Sun.COM qp->msg_snd_smallest = MSG_SMALL_INIT; 6430Sstevel@tonic-gate 6440Sstevel@tonic-gate if (error = ipc_commit_begin(msq_svc, key, msgflg, 6450Sstevel@tonic-gate (kipc_perm_t *)qp)) { 6460Sstevel@tonic-gate if (error == EAGAIN) 6470Sstevel@tonic-gate goto top; 6480Sstevel@tonic-gate return (set_errno(error)); 6490Sstevel@tonic-gate } 6500Sstevel@tonic-gate qp->msg_qbytes = rctl_enforced_value(rc_process_msgmnb, 6510Sstevel@tonic-gate pp->p_rctls, pp); 6520Sstevel@tonic-gate qp->msg_qmax = rctl_enforced_value(rc_process_msgtql, 6530Sstevel@tonic-gate pp->p_rctls, pp); 6540Sstevel@tonic-gate lock = ipc_commit_end(msq_svc, &qp->msg_perm); 6550Sstevel@tonic-gate } 656*11861SMarek.Pospisil@Sun.COM 657*11861SMarek.Pospisil@Sun.COM if (AU_AUDITING()) 6580Sstevel@tonic-gate audit_ipcget(AT_IPC_MSG, (void *)qp); 659*11861SMarek.Pospisil@Sun.COM 6600Sstevel@tonic-gate id = qp->msg_perm.ipc_id; 6610Sstevel@tonic-gate mutex_exit(lock); 6620Sstevel@tonic-gate return (id); 6630Sstevel@tonic-gate } 6640Sstevel@tonic-gate 6650Sstevel@tonic-gate static ssize_t 6660Sstevel@tonic-gate msgrcv(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, long msgtyp, int msgflg) 6670Sstevel@tonic-gate { 6680Sstevel@tonic-gate struct msg *smp; /* ptr to best msg on q */ 6690Sstevel@tonic-gate kmsqid_t *qp; /* ptr to associated q */ 6700Sstevel@tonic-gate kmutex_t *lock; 6710Sstevel@tonic-gate size_t xtsz; /* transfer byte count */ 6724153Sdv142724 int error = 0; 6730Sstevel@tonic-gate int cvres; 6746071Sdv142724 uint_t msg_hash; 6754153Sdv142724 msgq_wakeup_t msg_entry; 6760Sstevel@tonic-gate 6770Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, msg, 1); /* bump msg send/rcv count */ 6780Sstevel@tonic-gate 6794153Sdv142724 msg_hash = msg_type_hash(msgtyp); 6804153Sdv142724 if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) { 6810Sstevel@tonic-gate return ((ssize_t)set_errno(EINVAL)); 6824153Sdv142724 } 6830Sstevel@tonic-gate ipc_hold(msq_svc, (kipc_perm_t *)qp); 6840Sstevel@tonic-gate 6854153Sdv142724 if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) { 6860Sstevel@tonic-gate goto msgrcv_out; 6874153Sdv142724 } 6884153Sdv142724 6894153Sdv142724 /* 6904153Sdv142724 * Various information (including the condvar_t) required for the 6914153Sdv142724 * process to sleep is provided by it's stack. 6924153Sdv142724 */ 6934153Sdv142724 msg_entry.msgw_thrd = curthread; 6944153Sdv142724 msg_entry.msgw_snd_wake = 0; 6954153Sdv142724 msg_entry.msgw_type = msgtyp; 6964153Sdv142724 findmsg: 6974153Sdv142724 smp = msgrcv_lookup(qp, msgtyp); 6984153Sdv142724 6994153Sdv142724 if (smp) { 7004153Sdv142724 /* 7014153Sdv142724 * We found a possible message to copy out. 7024153Sdv142724 */ 7034153Sdv142724 if ((smp->msg_flags & MSG_RCVCOPY) == 0) { 7046071Sdv142724 long t = msg_entry.msgw_snd_wake; 7056147Sqiao long copy_type = smp->msg_type; 7066147Sqiao 7074153Sdv142724 /* 7084153Sdv142724 * It is available, attempt to copy it. 7094153Sdv142724 */ 7104153Sdv142724 error = msg_copyout(qp, msgtyp, &lock, &xtsz, msgsz, 7114153Sdv142724 smp, msgp, msgflg); 7126071Sdv142724 7136071Sdv142724 /* 7146071Sdv142724 * It is possible to consume a different message 7156071Sdv142724 * type then what originally awakened for (negative 7166071Sdv142724 * types). If this happens a check must be done to 7176071Sdv142724 * to determine if another receiver is available 7186071Sdv142724 * for the waking message type, Failure to do this 7196071Sdv142724 * can result in a message on the queue that can be 7206071Sdv142724 * serviced by a sleeping receiver. 7216071Sdv142724 */ 7226147Sqiao if (!error && t && (copy_type != t)) 7236071Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, t); 7246071Sdv142724 7254153Sdv142724 /* 7264153Sdv142724 * Don't forget to wakeup a sleeper that blocked because 7274153Sdv142724 * we were copying things out. 7284153Sdv142724 */ 7294153Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0); 7304153Sdv142724 goto msgrcv_out; 7314153Sdv142724 } 7324153Sdv142724 /* 7334153Sdv142724 * The selected message is being copied out, so block. We do 7344153Sdv142724 * not need to wake the next person up on the msg_cpy_block list 7354153Sdv142724 * due to the fact some one is copying out and they will get 7364153Sdv142724 * things moving again once the copy is completed. 7374153Sdv142724 */ 7384153Sdv142724 cvres = msg_rcvq_sleep(&qp->msg_cpy_block, 7394153Sdv142724 &msg_entry, &lock, qp); 7404153Sdv142724 error = msgq_check_err(qp, cvres); 7414153Sdv142724 if (error) { 7424153Sdv142724 goto msgrcv_out; 7434153Sdv142724 } 7444153Sdv142724 goto findmsg; 7454153Sdv142724 } 7464153Sdv142724 /* 7474153Sdv142724 * There isn't a message to copy out that matches the designated 7484153Sdv142724 * criteria. 7494153Sdv142724 */ 7504153Sdv142724 if (msgflg & IPC_NOWAIT) { 7514153Sdv142724 error = ENOMSG; 7524153Sdv142724 goto msgrcv_out; 7534153Sdv142724 } 7544153Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0); 7554153Sdv142724 7564153Sdv142724 /* 7574153Sdv142724 * Wait for new message. We keep the negative and positive types 7584153Sdv142724 * separate for performance reasons. 7594153Sdv142724 */ 7604153Sdv142724 msg_entry.msgw_snd_wake = 0; 7614153Sdv142724 if (msgtyp >= 0) { 7624153Sdv142724 cvres = msg_rcvq_sleep(&qp->msg_wait_snd[msg_hash], 7634153Sdv142724 &msg_entry, &lock, qp); 7644153Sdv142724 } else { 7654153Sdv142724 qp->msg_ngt_cnt++; 7664153Sdv142724 cvres = msg_rcvq_sleep(&qp->msg_wait_snd_ngt[msg_hash], 7674153Sdv142724 &msg_entry, &lock, qp); 7684153Sdv142724 qp->msg_ngt_cnt--; 7694153Sdv142724 } 7704153Sdv142724 7714153Sdv142724 if (!(error = msgq_check_err(qp, cvres))) { 7724153Sdv142724 goto findmsg; 7734153Sdv142724 } 7744153Sdv142724 7754153Sdv142724 msgrcv_out: 7764153Sdv142724 if (error) { 7774153Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0); 7784153Sdv142724 if (msg_entry.msgw_snd_wake) { 7794153Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, 7804153Sdv142724 msg_entry.msgw_snd_wake); 7814153Sdv142724 } 7824153Sdv142724 ipc_rele(msq_svc, (kipc_perm_t *)qp); 7834153Sdv142724 return ((ssize_t)set_errno(error)); 7844153Sdv142724 } 7854153Sdv142724 ipc_rele(msq_svc, (kipc_perm_t *)qp); 7864153Sdv142724 return ((ssize_t)xtsz); 7874153Sdv142724 } 7880Sstevel@tonic-gate 7894153Sdv142724 static int 7904153Sdv142724 msgq_check_err(kmsqid_t *qp, int cvres) 7914153Sdv142724 { 7924153Sdv142724 if (IPC_FREE(&qp->msg_perm)) { 7934153Sdv142724 return (EIDRM); 7944153Sdv142724 } 7954153Sdv142724 7964153Sdv142724 if (cvres == 0) { 7974153Sdv142724 return (EINTR); 7984153Sdv142724 } 7994153Sdv142724 8004153Sdv142724 return (0); 8014153Sdv142724 } 8024153Sdv142724 8034153Sdv142724 static int 8044153Sdv142724 msg_copyout(kmsqid_t *qp, long msgtyp, kmutex_t **lock, size_t *xtsz_ret, 8054153Sdv142724 size_t msgsz, struct msg *smp, struct ipcmsgbuf *msgp, int msgflg) 8064153Sdv142724 { 8074153Sdv142724 size_t xtsz; 8084153Sdv142724 STRUCT_HANDLE(ipcmsgbuf, umsgp); 8094153Sdv142724 model_t mdl = get_udatamodel(); 8104153Sdv142724 int copyerror = 0; 8114153Sdv142724 8124153Sdv142724 STRUCT_SET_HANDLE(umsgp, mdl, msgp); 8134153Sdv142724 if (msgsz < smp->msg_size) { 8144153Sdv142724 if ((msgflg & MSG_NOERROR) == 0) { 8154153Sdv142724 return (E2BIG); 8164153Sdv142724 } else { 8174153Sdv142724 xtsz = msgsz; 8184153Sdv142724 } 8194153Sdv142724 } else { 8204153Sdv142724 xtsz = smp->msg_size; 8214153Sdv142724 } 8224153Sdv142724 *xtsz_ret = xtsz; 8234153Sdv142724 8244153Sdv142724 /* 8254153Sdv142724 * To prevent a DOS attack we mark the message as being 8264153Sdv142724 * copied out and release mutex. When the copy is completed 8274153Sdv142724 * we need to acquire the mutex and make the appropriate updates. 8284153Sdv142724 */ 8294153Sdv142724 ASSERT((smp->msg_flags & MSG_RCVCOPY) == 0); 8304153Sdv142724 smp->msg_flags |= MSG_RCVCOPY; 8314153Sdv142724 msg_hold(smp); 8324153Sdv142724 if (msgtyp < 0) { 8334153Sdv142724 ASSERT(qp->msg_neg_copy == 0); 8344153Sdv142724 qp->msg_neg_copy = 1; 8354153Sdv142724 } 8364153Sdv142724 mutex_exit(*lock); 8374153Sdv142724 8384153Sdv142724 if (mdl == DATAMODEL_NATIVE) { 8394153Sdv142724 copyerror = copyout(&smp->msg_type, msgp, 8404153Sdv142724 sizeof (smp->msg_type)); 8414153Sdv142724 } else { 8424153Sdv142724 /* 8434153Sdv142724 * 32-bit callers need an imploded msg type. 8444153Sdv142724 */ 8454153Sdv142724 int32_t msg_type32 = smp->msg_type; 8464153Sdv142724 8474153Sdv142724 copyerror = copyout(&msg_type32, msgp, 8484153Sdv142724 sizeof (msg_type32)); 8494153Sdv142724 } 8504153Sdv142724 8514153Sdv142724 if (copyerror == 0 && xtsz) { 8524153Sdv142724 copyerror = copyout(smp->msg_addr, 8534153Sdv142724 STRUCT_FADDR(umsgp, mtext), xtsz); 8544153Sdv142724 } 8554153Sdv142724 8564153Sdv142724 /* 8574153Sdv142724 * Reclaim the mutex and make sure the message queue still exists. 8584153Sdv142724 */ 8594153Sdv142724 8604153Sdv142724 *lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id); 8614153Sdv142724 if (msgtyp < 0) { 8624153Sdv142724 qp->msg_neg_copy = 0; 8634153Sdv142724 } 8644153Sdv142724 ASSERT(smp->msg_flags & MSG_RCVCOPY); 8654153Sdv142724 smp->msg_flags &= ~MSG_RCVCOPY; 8664153Sdv142724 msg_rele(smp); 8674153Sdv142724 if (IPC_FREE(&qp->msg_perm)) { 8684153Sdv142724 return (EIDRM); 8694153Sdv142724 } 8704153Sdv142724 if (copyerror) { 8714153Sdv142724 return (EFAULT); 8724153Sdv142724 } 8734153Sdv142724 qp->msg_lrpid = ttoproc(curthread)->p_pid; 8744153Sdv142724 qp->msg_rtime = gethrestime_sec(); 8754153Sdv142724 msgunlink(qp, smp); 8764153Sdv142724 return (0); 8774153Sdv142724 } 8784153Sdv142724 8794153Sdv142724 static struct msg * 8804153Sdv142724 msgrcv_lookup(kmsqid_t *qp, long msgtyp) 8814153Sdv142724 { 8824153Sdv142724 struct msg *smp = NULL; 8836071Sdv142724 long qp_low; 8844153Sdv142724 struct msg *mp; /* ptr to msg on q */ 8856071Sdv142724 long low_msgtype; 8864153Sdv142724 static struct msg neg_copy_smp; 8874153Sdv142724 8880Sstevel@tonic-gate mp = list_head(&qp->msg_list); 8890Sstevel@tonic-gate if (msgtyp == 0) { 8900Sstevel@tonic-gate smp = mp; 8910Sstevel@tonic-gate } else { 8924153Sdv142724 qp_low = qp->msg_lowest_type; 8934153Sdv142724 if (msgtyp > 0) { 8944153Sdv142724 /* 8954153Sdv142724 * If our lowest possible message type is larger than 8964153Sdv142724 * the message type desired, then we know there is 8974153Sdv142724 * no entry present. 8984153Sdv142724 */ 8994153Sdv142724 if (qp_low > msgtyp) { 9004153Sdv142724 return (NULL); 9014153Sdv142724 } 9024153Sdv142724 9034153Sdv142724 for (; mp; mp = list_next(&qp->msg_list, mp)) { 9044153Sdv142724 if (msgtyp == mp->msg_type) { 9054153Sdv142724 smp = mp; 9064153Sdv142724 break; 9074153Sdv142724 } 9080Sstevel@tonic-gate } 9094153Sdv142724 } else { 9104153Sdv142724 /* 9114153Sdv142724 * We have kept track of the lowest possible message 9124153Sdv142724 * type on the send queue. This allows us to terminate 9134153Sdv142724 * the search early if we find a message type of that 9144153Sdv142724 * type. Note, the lowest type may not be the actual 9154153Sdv142724 * lowest value in the system, it is only guaranteed 9164153Sdv142724 * that there isn't a value lower than that. 9174153Sdv142724 */ 9184153Sdv142724 low_msgtype = -msgtyp; 9196071Sdv142724 if (low_msgtype < qp_low) { 9204153Sdv142724 return (NULL); 9214153Sdv142724 } 9224153Sdv142724 if (qp->msg_neg_copy) { 9234153Sdv142724 neg_copy_smp.msg_flags = MSG_RCVCOPY; 9244153Sdv142724 return (&neg_copy_smp); 9254153Sdv142724 } 9264153Sdv142724 for (; mp; mp = list_next(&qp->msg_list, mp)) { 9276071Sdv142724 if (mp->msg_type <= low_msgtype && 9286071Sdv142724 !(smp && smp->msg_type <= mp->msg_type)) { 9294153Sdv142724 smp = mp; 9304153Sdv142724 low_msgtype = mp->msg_type; 9314153Sdv142724 if (low_msgtype == qp_low) { 9324153Sdv142724 break; 9334153Sdv142724 } 9344153Sdv142724 } 9354153Sdv142724 } 9364153Sdv142724 if (smp) { 9374153Sdv142724 /* 9384153Sdv142724 * Update the lowest message type. 9394153Sdv142724 */ 9404153Sdv142724 qp->msg_lowest_type = smp->msg_type; 9410Sstevel@tonic-gate } 9420Sstevel@tonic-gate } 9430Sstevel@tonic-gate } 9444153Sdv142724 return (smp); 9450Sstevel@tonic-gate } 9460Sstevel@tonic-gate 9470Sstevel@tonic-gate /* 9480Sstevel@tonic-gate * msgids system call. 9490Sstevel@tonic-gate */ 9500Sstevel@tonic-gate static int 9510Sstevel@tonic-gate msgids(int *buf, uint_t nids, uint_t *pnids) 9520Sstevel@tonic-gate { 9530Sstevel@tonic-gate int error; 9540Sstevel@tonic-gate 9550Sstevel@tonic-gate if (error = ipc_ids(msq_svc, buf, nids, pnids)) 9560Sstevel@tonic-gate return (set_errno(error)); 9570Sstevel@tonic-gate 9580Sstevel@tonic-gate return (0); 9590Sstevel@tonic-gate } 9600Sstevel@tonic-gate 9610Sstevel@tonic-gate #define RND(x) roundup((x), sizeof (size_t)) 9620Sstevel@tonic-gate #define RND32(x) roundup((x), sizeof (size32_t)) 9630Sstevel@tonic-gate 9640Sstevel@tonic-gate /* 9650Sstevel@tonic-gate * msgsnap system call. 9660Sstevel@tonic-gate */ 9670Sstevel@tonic-gate static int 9680Sstevel@tonic-gate msgsnap(int msqid, caddr_t buf, size_t bufsz, long msgtyp) 9690Sstevel@tonic-gate { 9700Sstevel@tonic-gate struct msg *mp; /* ptr to msg on q */ 9710Sstevel@tonic-gate kmsqid_t *qp; /* ptr to associated q */ 9720Sstevel@tonic-gate kmutex_t *lock; 9730Sstevel@tonic-gate size_t size; 9740Sstevel@tonic-gate size_t nmsg; 9750Sstevel@tonic-gate struct msg **snaplist; 9760Sstevel@tonic-gate int error, i; 9770Sstevel@tonic-gate model_t mdl = get_udatamodel(); 9780Sstevel@tonic-gate STRUCT_DECL(msgsnap_head, head); 9790Sstevel@tonic-gate STRUCT_DECL(msgsnap_mhead, mhead); 9800Sstevel@tonic-gate 9810Sstevel@tonic-gate STRUCT_INIT(head, mdl); 9820Sstevel@tonic-gate STRUCT_INIT(mhead, mdl); 9830Sstevel@tonic-gate 9840Sstevel@tonic-gate if (bufsz < STRUCT_SIZE(head)) 9850Sstevel@tonic-gate return (set_errno(EINVAL)); 9860Sstevel@tonic-gate 9870Sstevel@tonic-gate if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) 9880Sstevel@tonic-gate return (set_errno(EINVAL)); 9890Sstevel@tonic-gate 9900Sstevel@tonic-gate if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) { 9910Sstevel@tonic-gate mutex_exit(lock); 9920Sstevel@tonic-gate return (set_errno(error)); 9930Sstevel@tonic-gate } 9940Sstevel@tonic-gate ipc_hold(msq_svc, (kipc_perm_t *)qp); 9950Sstevel@tonic-gate 9960Sstevel@tonic-gate /* 9970Sstevel@tonic-gate * First compute the required buffer size and 9980Sstevel@tonic-gate * the number of messages on the queue. 9990Sstevel@tonic-gate */ 10000Sstevel@tonic-gate size = nmsg = 0; 10010Sstevel@tonic-gate for (mp = list_head(&qp->msg_list); mp; 10020Sstevel@tonic-gate mp = list_next(&qp->msg_list, mp)) { 10030Sstevel@tonic-gate if (msgtyp == 0 || 10040Sstevel@tonic-gate (msgtyp > 0 && msgtyp == mp->msg_type) || 10050Sstevel@tonic-gate (msgtyp < 0 && mp->msg_type <= -msgtyp)) { 10060Sstevel@tonic-gate nmsg++; 10070Sstevel@tonic-gate if (mdl == DATAMODEL_NATIVE) 10080Sstevel@tonic-gate size += RND(mp->msg_size); 10090Sstevel@tonic-gate else 10100Sstevel@tonic-gate size += RND32(mp->msg_size); 10110Sstevel@tonic-gate } 10120Sstevel@tonic-gate } 10130Sstevel@tonic-gate 10140Sstevel@tonic-gate size += STRUCT_SIZE(head) + nmsg * STRUCT_SIZE(mhead); 10150Sstevel@tonic-gate if (size > bufsz) 10160Sstevel@tonic-gate nmsg = 0; 10170Sstevel@tonic-gate 10180Sstevel@tonic-gate if (nmsg > 0) { 10190Sstevel@tonic-gate /* 10200Sstevel@tonic-gate * Mark the messages as being copied. 10210Sstevel@tonic-gate */ 10220Sstevel@tonic-gate snaplist = (struct msg **)kmem_alloc(nmsg * 10230Sstevel@tonic-gate sizeof (struct msg *), KM_SLEEP); 10240Sstevel@tonic-gate i = 0; 10250Sstevel@tonic-gate for (mp = list_head(&qp->msg_list); mp; 10260Sstevel@tonic-gate mp = list_next(&qp->msg_list, mp)) { 10270Sstevel@tonic-gate if (msgtyp == 0 || 10280Sstevel@tonic-gate (msgtyp > 0 && msgtyp == mp->msg_type) || 10290Sstevel@tonic-gate (msgtyp < 0 && mp->msg_type <= -msgtyp)) { 10300Sstevel@tonic-gate msg_hold(mp); 10310Sstevel@tonic-gate snaplist[i] = mp; 10320Sstevel@tonic-gate i++; 10330Sstevel@tonic-gate } 10340Sstevel@tonic-gate } 10350Sstevel@tonic-gate } 10360Sstevel@tonic-gate mutex_exit(lock); 10370Sstevel@tonic-gate 10380Sstevel@tonic-gate /* 10390Sstevel@tonic-gate * Copy out the buffer header. 10400Sstevel@tonic-gate */ 10410Sstevel@tonic-gate STRUCT_FSET(head, msgsnap_size, size); 10420Sstevel@tonic-gate STRUCT_FSET(head, msgsnap_nmsg, nmsg); 10430Sstevel@tonic-gate if (copyout(STRUCT_BUF(head), buf, STRUCT_SIZE(head))) 10440Sstevel@tonic-gate error = EFAULT; 10450Sstevel@tonic-gate 10460Sstevel@tonic-gate buf += STRUCT_SIZE(head); 10470Sstevel@tonic-gate 10480Sstevel@tonic-gate /* 10490Sstevel@tonic-gate * Now copy out the messages one by one. 10500Sstevel@tonic-gate */ 10510Sstevel@tonic-gate for (i = 0; i < nmsg; i++) { 10520Sstevel@tonic-gate mp = snaplist[i]; 10530Sstevel@tonic-gate if (error == 0) { 10540Sstevel@tonic-gate STRUCT_FSET(mhead, msgsnap_mlen, mp->msg_size); 10550Sstevel@tonic-gate STRUCT_FSET(mhead, msgsnap_mtype, mp->msg_type); 10560Sstevel@tonic-gate if (copyout(STRUCT_BUF(mhead), buf, STRUCT_SIZE(mhead))) 10570Sstevel@tonic-gate error = EFAULT; 10580Sstevel@tonic-gate buf += STRUCT_SIZE(mhead); 10590Sstevel@tonic-gate 10600Sstevel@tonic-gate if (error == 0 && 10610Sstevel@tonic-gate mp->msg_size != 0 && 10620Sstevel@tonic-gate copyout(mp->msg_addr, buf, mp->msg_size)) 10630Sstevel@tonic-gate error = EFAULT; 10640Sstevel@tonic-gate if (mdl == DATAMODEL_NATIVE) 10650Sstevel@tonic-gate buf += RND(mp->msg_size); 10660Sstevel@tonic-gate else 10670Sstevel@tonic-gate buf += RND32(mp->msg_size); 10680Sstevel@tonic-gate } 10690Sstevel@tonic-gate lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id); 10700Sstevel@tonic-gate msg_rele(mp); 10710Sstevel@tonic-gate /* Check for msg q deleted or reallocated */ 10720Sstevel@tonic-gate if (IPC_FREE(&qp->msg_perm)) 10730Sstevel@tonic-gate error = EIDRM; 10740Sstevel@tonic-gate mutex_exit(lock); 10750Sstevel@tonic-gate } 10760Sstevel@tonic-gate 10770Sstevel@tonic-gate (void) ipc_lock(msq_svc, qp->msg_perm.ipc_id); 10780Sstevel@tonic-gate ipc_rele(msq_svc, (kipc_perm_t *)qp); 10790Sstevel@tonic-gate 10800Sstevel@tonic-gate if (nmsg > 0) 10810Sstevel@tonic-gate kmem_free(snaplist, nmsg * sizeof (struct msg *)); 10820Sstevel@tonic-gate 10830Sstevel@tonic-gate if (error) 10840Sstevel@tonic-gate return (set_errno(error)); 10850Sstevel@tonic-gate return (0); 10860Sstevel@tonic-gate } 10870Sstevel@tonic-gate 10882983Sdv142724 #define MSG_PREALLOC_LIMIT 8192 10892983Sdv142724 10900Sstevel@tonic-gate /* 10910Sstevel@tonic-gate * msgsnd system call. 10920Sstevel@tonic-gate */ 10930Sstevel@tonic-gate static int 10940Sstevel@tonic-gate msgsnd(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, int msgflg) 10950Sstevel@tonic-gate { 10960Sstevel@tonic-gate kmsqid_t *qp; 10972983Sdv142724 kmutex_t *lock = NULL; 10980Sstevel@tonic-gate struct msg *mp = NULL; 10990Sstevel@tonic-gate long type; 11008834SDavid.Valin@Sun.COM int error = 0, wait_wakeup = 0; 11018834SDavid.Valin@Sun.COM msgq_wakeup_t msg_entry; 11020Sstevel@tonic-gate model_t mdl = get_udatamodel(); 11030Sstevel@tonic-gate STRUCT_HANDLE(ipcmsgbuf, umsgp); 11040Sstevel@tonic-gate 11050Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, msg, 1); /* bump msg send/rcv count */ 11060Sstevel@tonic-gate STRUCT_SET_HANDLE(umsgp, mdl, msgp); 11070Sstevel@tonic-gate 11080Sstevel@tonic-gate if (mdl == DATAMODEL_NATIVE) { 11090Sstevel@tonic-gate if (copyin(msgp, &type, sizeof (type))) 11100Sstevel@tonic-gate return (set_errno(EFAULT)); 11110Sstevel@tonic-gate } else { 11120Sstevel@tonic-gate int32_t type32; 11130Sstevel@tonic-gate if (copyin(msgp, &type32, sizeof (type32))) 11140Sstevel@tonic-gate return (set_errno(EFAULT)); 11150Sstevel@tonic-gate type = type32; 11160Sstevel@tonic-gate } 11170Sstevel@tonic-gate 11180Sstevel@tonic-gate if (type < 1) 11190Sstevel@tonic-gate return (set_errno(EINVAL)); 11200Sstevel@tonic-gate 11212983Sdv142724 /* 11222983Sdv142724 * We want the value here large enough that most of the 11232983Sdv142724 * the message operations will use the "lockless" path, 11242983Sdv142724 * but small enough that a user can not reserve large 11252983Sdv142724 * chunks of kernel memory unless they have a valid 11262983Sdv142724 * reason to. 11272983Sdv142724 */ 11282983Sdv142724 if (msgsz <= MSG_PREALLOC_LIMIT) { 11292983Sdv142724 /* 11302983Sdv142724 * We are small enough that we can afford to do the 11312983Sdv142724 * allocation now. This saves dropping the lock 11322983Sdv142724 * and then reacquiring the lock. 11332983Sdv142724 */ 11342983Sdv142724 mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP); 11352983Sdv142724 mp->msg_copycnt = 1; 11362983Sdv142724 mp->msg_size = msgsz; 11372983Sdv142724 if (msgsz) { 11382983Sdv142724 mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP); 11392983Sdv142724 if (copyin(STRUCT_FADDR(umsgp, mtext), 11402983Sdv142724 mp->msg_addr, msgsz) == -1) { 11412983Sdv142724 error = EFAULT; 11422983Sdv142724 goto msgsnd_out; 11432983Sdv142724 } 11442983Sdv142724 } 11452983Sdv142724 } 11462983Sdv142724 11472983Sdv142724 if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) { 11482983Sdv142724 error = EINVAL; 11492983Sdv142724 goto msgsnd_out; 11502983Sdv142724 } 11512983Sdv142724 11520Sstevel@tonic-gate ipc_hold(msq_svc, (kipc_perm_t *)qp); 11530Sstevel@tonic-gate 11540Sstevel@tonic-gate if (msgsz > qp->msg_qbytes) { 11550Sstevel@tonic-gate error = EINVAL; 11560Sstevel@tonic-gate goto msgsnd_out; 11570Sstevel@tonic-gate } 11580Sstevel@tonic-gate 11590Sstevel@tonic-gate if (error = ipcperm_access(&qp->msg_perm, MSG_W, CRED())) 11600Sstevel@tonic-gate goto msgsnd_out; 11610Sstevel@tonic-gate 11620Sstevel@tonic-gate top: 11630Sstevel@tonic-gate /* 11640Sstevel@tonic-gate * Allocate space on q, message header, & buffer space. 11650Sstevel@tonic-gate */ 11660Sstevel@tonic-gate ASSERT(qp->msg_qnum <= qp->msg_qmax); 11670Sstevel@tonic-gate while ((msgsz > qp->msg_qbytes - qp->msg_cbytes) || 11680Sstevel@tonic-gate (qp->msg_qnum == qp->msg_qmax)) { 11690Sstevel@tonic-gate int cvres; 11700Sstevel@tonic-gate 11710Sstevel@tonic-gate if (msgflg & IPC_NOWAIT) { 11720Sstevel@tonic-gate error = EAGAIN; 11730Sstevel@tonic-gate goto msgsnd_out; 11740Sstevel@tonic-gate } 11750Sstevel@tonic-gate 11768834SDavid.Valin@Sun.COM wait_wakeup = 0; 11770Sstevel@tonic-gate qp->msg_snd_cnt++; 11788834SDavid.Valin@Sun.COM msg_entry.msgw_snd_size = msgsz; 11798834SDavid.Valin@Sun.COM msg_entry.msgw_thrd = curthread; 11808834SDavid.Valin@Sun.COM msg_entry.msgw_type = type; 11818834SDavid.Valin@Sun.COM cv_init(&msg_entry.msgw_wake_cv, NULL, 0, NULL); 11828834SDavid.Valin@Sun.COM list_insert_tail(&qp->msg_wait_rcv, &msg_entry); 11838834SDavid.Valin@Sun.COM if (qp->msg_snd_smallest > msgsz) 11848834SDavid.Valin@Sun.COM qp->msg_snd_smallest = msgsz; 11858834SDavid.Valin@Sun.COM cvres = cv_wait_sig(&msg_entry.msgw_wake_cv, lock); 11860Sstevel@tonic-gate lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock); 11870Sstevel@tonic-gate qp->msg_snd_cnt--; 11888834SDavid.Valin@Sun.COM if (list_link_active(&msg_entry.msgw_list)) 11898834SDavid.Valin@Sun.COM list_remove(&qp->msg_wait_rcv, &msg_entry); 11904153Sdv142724 if (error = msgq_check_err(qp, cvres)) { 11910Sstevel@tonic-gate goto msgsnd_out; 11920Sstevel@tonic-gate } 11938834SDavid.Valin@Sun.COM wait_wakeup = 1; 11940Sstevel@tonic-gate } 11950Sstevel@tonic-gate 11960Sstevel@tonic-gate if (mp == NULL) { 11970Sstevel@tonic-gate int failure; 11980Sstevel@tonic-gate 11990Sstevel@tonic-gate mutex_exit(lock); 12002983Sdv142724 ASSERT(msgsz > 0); 12010Sstevel@tonic-gate mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP); 12022983Sdv142724 mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP); 12030Sstevel@tonic-gate mp->msg_size = msgsz; 12040Sstevel@tonic-gate mp->msg_copycnt = 1; 12050Sstevel@tonic-gate 12062983Sdv142724 failure = (copyin(STRUCT_FADDR(umsgp, mtext), 12070Sstevel@tonic-gate mp->msg_addr, msgsz) == -1); 12080Sstevel@tonic-gate lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id); 12090Sstevel@tonic-gate if (IPC_FREE(&qp->msg_perm)) { 12100Sstevel@tonic-gate error = EIDRM; 12110Sstevel@tonic-gate goto msgsnd_out; 12120Sstevel@tonic-gate } 12130Sstevel@tonic-gate if (failure) { 12140Sstevel@tonic-gate error = EFAULT; 12150Sstevel@tonic-gate goto msgsnd_out; 12160Sstevel@tonic-gate } 12170Sstevel@tonic-gate goto top; 12180Sstevel@tonic-gate } 12190Sstevel@tonic-gate 12200Sstevel@tonic-gate /* 12210Sstevel@tonic-gate * Everything is available, put msg on q. 12220Sstevel@tonic-gate */ 12230Sstevel@tonic-gate qp->msg_qnum++; 12240Sstevel@tonic-gate qp->msg_cbytes += msgsz; 12250Sstevel@tonic-gate qp->msg_lspid = curproc->p_pid; 12260Sstevel@tonic-gate qp->msg_stime = gethrestime_sec(); 12270Sstevel@tonic-gate mp->msg_type = type; 12284153Sdv142724 if (qp->msg_lowest_type > type) 12294153Sdv142724 qp->msg_lowest_type = type; 12300Sstevel@tonic-gate list_insert_tail(&qp->msg_list, mp); 12312565Sudpa /* 12324153Sdv142724 * Get the proper receiver going. 12332565Sudpa */ 12344153Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, type); 12350Sstevel@tonic-gate 12360Sstevel@tonic-gate msgsnd_out: 12378834SDavid.Valin@Sun.COM /* 12388834SDavid.Valin@Sun.COM * We were woken up from the send wait list, but an 12398834SDavid.Valin@Sun.COM * an error occured on placing the message onto the 12408834SDavid.Valin@Sun.COM * msg queue. Given that, we need to do the wakeup 12418834SDavid.Valin@Sun.COM * dance again. 12428834SDavid.Valin@Sun.COM */ 12438834SDavid.Valin@Sun.COM 12448834SDavid.Valin@Sun.COM if (wait_wakeup && error) { 12458834SDavid.Valin@Sun.COM msg_wakeup_senders(qp); 12468834SDavid.Valin@Sun.COM } 12472983Sdv142724 if (lock) 12482983Sdv142724 ipc_rele(msq_svc, (kipc_perm_t *)qp); /* drops lock */ 12490Sstevel@tonic-gate 12500Sstevel@tonic-gate if (error) { 12510Sstevel@tonic-gate if (mp) 12520Sstevel@tonic-gate msg_rele(mp); 12530Sstevel@tonic-gate return (set_errno(error)); 12540Sstevel@tonic-gate } 12550Sstevel@tonic-gate 12560Sstevel@tonic-gate return (0); 12570Sstevel@tonic-gate } 12580Sstevel@tonic-gate 12594153Sdv142724 static void 12604153Sdv142724 msg_wakeup_rdr(kmsqid_t *qp, msg_select_t **flist, long type) 12614153Sdv142724 { 12624153Sdv142724 msg_select_t *walker = *flist; 12634153Sdv142724 msgq_wakeup_t *wakeup; 12646071Sdv142724 uint_t msg_hash; 12654153Sdv142724 12664153Sdv142724 msg_hash = msg_type_hash(type); 12674153Sdv142724 12684153Sdv142724 do { 12694153Sdv142724 wakeup = walker->selection(qp, msg_hash, type); 12704153Sdv142724 walker = walker->next_selection; 12714153Sdv142724 } while (!wakeup && walker != *flist); 12724153Sdv142724 12734153Sdv142724 *flist = (*flist)->next_selection; 12744153Sdv142724 if (wakeup) { 12754153Sdv142724 if (type) { 12764153Sdv142724 wakeup->msgw_snd_wake = type; 12774153Sdv142724 } 12784153Sdv142724 cv_signal(&wakeup->msgw_wake_cv); 12794153Sdv142724 } 12804153Sdv142724 } 12814153Sdv142724 12826071Sdv142724 static uint_t 12834153Sdv142724 msg_type_hash(long msg_type) 12844153Sdv142724 { 12854153Sdv142724 if (msg_type < 0) { 12866071Sdv142724 long hash = -msg_type / MSG_NEG_INTERVAL; 12874153Sdv142724 /* 12884153Sdv142724 * Negative message types are hashed over an 12894153Sdv142724 * interval. Any message type that hashes 12904153Sdv142724 * beyond MSG_MAX_QNUM is automatically placed 12914153Sdv142724 * in the last bucket. 12924153Sdv142724 */ 12936071Sdv142724 if (hash > MSG_MAX_QNUM) 12944153Sdv142724 hash = MSG_MAX_QNUM; 12954153Sdv142724 return (hash); 12964153Sdv142724 } 12974153Sdv142724 12984153Sdv142724 /* 12994153Sdv142724 * 0 or positive message type. The first bucket is reserved for 13004153Sdv142724 * message receivers of type 0, the other buckets we hash into. 13014153Sdv142724 */ 13026071Sdv142724 if (msg_type) 13036071Sdv142724 return (1 + (msg_type % MSG_MAX_QNUM)); 13044153Sdv142724 return (0); 13054153Sdv142724 } 13064153Sdv142724 13074153Sdv142724 /* 13084153Sdv142724 * Routines to see if we have a receiver of type 0 either blocked waiting 13094153Sdv142724 * for a message. Simply return the first guy on the list. 13104153Sdv142724 */ 13114153Sdv142724 13124153Sdv142724 static msgq_wakeup_t * 13136071Sdv142724 /* ARGSUSED */ 13144153Sdv142724 msg_fnd_any_snd(kmsqid_t *qp, int msg_hash, long type) 13154153Sdv142724 { 13166071Sdv142724 msgq_wakeup_t *walker; 13176071Sdv142724 13186071Sdv142724 walker = list_head(&qp->msg_wait_snd[0]); 13196071Sdv142724 13206071Sdv142724 if (walker) 13216071Sdv142724 list_remove(&qp->msg_wait_snd[0], walker); 13226071Sdv142724 return (walker); 13234153Sdv142724 } 13244153Sdv142724 13254153Sdv142724 static msgq_wakeup_t * 13266071Sdv142724 /* ARGSUSED */ 13274153Sdv142724 msg_fnd_any_rdr(kmsqid_t *qp, int msg_hash, long type) 13284153Sdv142724 { 13296071Sdv142724 msgq_wakeup_t *walker; 13306071Sdv142724 13316071Sdv142724 walker = list_head(&qp->msg_cpy_block); 13326071Sdv142724 if (walker) 13336071Sdv142724 list_remove(&qp->msg_cpy_block, walker); 13346071Sdv142724 return (walker); 13354153Sdv142724 } 13364153Sdv142724 13374153Sdv142724 static msgq_wakeup_t * 13384153Sdv142724 msg_fnd_spc_snd(kmsqid_t *qp, int msg_hash, long type) 13394153Sdv142724 { 13404153Sdv142724 msgq_wakeup_t *walker; 13414153Sdv142724 13424153Sdv142724 walker = list_head(&qp->msg_wait_snd[msg_hash]); 13434153Sdv142724 13446071Sdv142724 while (walker && walker->msgw_type != type) 13456071Sdv142724 walker = list_next(&qp->msg_wait_snd[msg_hash], walker); 13466071Sdv142724 if (walker) 13476071Sdv142724 list_remove(&qp->msg_wait_snd[msg_hash], walker); 13484153Sdv142724 return (walker); 13494153Sdv142724 } 13504153Sdv142724 13516071Sdv142724 /* ARGSUSED */ 13524153Sdv142724 static msgq_wakeup_t * 13534153Sdv142724 msg_fnd_neg_snd(kmsqid_t *qp, int msg_hash, long type) 13544153Sdv142724 { 13554153Sdv142724 msgq_wakeup_t *qptr; 13564153Sdv142724 int count; 13574153Sdv142724 int check_index; 13584153Sdv142724 int neg_index; 13594153Sdv142724 int nbuckets; 13604153Sdv142724 13614153Sdv142724 if (!qp->msg_ngt_cnt) { 13624153Sdv142724 return (NULL); 13634153Sdv142724 } 13644153Sdv142724 neg_index = msg_type_hash(-type); 13654153Sdv142724 13664153Sdv142724 /* 13674153Sdv142724 * Check for a match among the negative type queues. Any buckets 13684153Sdv142724 * at neg_index or larger can match the type. Use the last send 13694153Sdv142724 * time to randomize the starting bucket to prevent starvation. 13704153Sdv142724 * Search all buckets from neg_index to MSG_MAX_QNUM, starting 13714153Sdv142724 * from the random starting point, and wrapping around after 13724153Sdv142724 * MSG_MAX_QNUM. 13734153Sdv142724 */ 13744153Sdv142724 13754153Sdv142724 nbuckets = MSG_MAX_QNUM - neg_index + 1; 13764153Sdv142724 check_index = neg_index + (qp->msg_stime % nbuckets); 13774153Sdv142724 13784153Sdv142724 for (count = nbuckets; count > 0; count--) { 13794153Sdv142724 qptr = list_head(&qp->msg_wait_snd_ngt[check_index]); 13804153Sdv142724 while (qptr) { 13814153Sdv142724 /* 13824153Sdv142724 * The lowest hash bucket may actually contain 13834153Sdv142724 * message types that are not valid for this 13844153Sdv142724 * request. This can happen due to the fact that 13854153Sdv142724 * the message buckets actually contain a consecutive 13864153Sdv142724 * range of types. 13874153Sdv142724 */ 13884153Sdv142724 if (-qptr->msgw_type >= type) { 13896071Sdv142724 list_remove(&qp->msg_wait_snd_ngt[check_index], 13906071Sdv142724 qptr); 13914153Sdv142724 return (qptr); 13924153Sdv142724 } 13936071Sdv142724 qptr = list_next(&qp->msg_wait_snd_ngt[check_index], 13946071Sdv142724 qptr); 13954153Sdv142724 } 13964153Sdv142724 if (++check_index > MSG_MAX_QNUM) { 13974153Sdv142724 check_index = neg_index; 13984153Sdv142724 } 13994153Sdv142724 } 14004153Sdv142724 return (NULL); 14014153Sdv142724 } 14024153Sdv142724 14034153Sdv142724 static int 14044153Sdv142724 msg_rcvq_sleep(list_t *queue, msgq_wakeup_t *entry, kmutex_t **lock, 14054153Sdv142724 kmsqid_t *qp) 14064153Sdv142724 { 14074153Sdv142724 int cvres; 14084153Sdv142724 14094153Sdv142724 cv_init(&entry->msgw_wake_cv, NULL, 0, NULL); 14104153Sdv142724 14114153Sdv142724 list_insert_tail(queue, entry); 14124153Sdv142724 14134153Sdv142724 qp->msg_rcv_cnt++; 14144153Sdv142724 cvres = cv_wait_sig(&entry->msgw_wake_cv, *lock); 14154153Sdv142724 *lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, *lock); 14164153Sdv142724 qp->msg_rcv_cnt--; 14176071Sdv142724 14186071Sdv142724 if (list_link_active(&entry->msgw_list)) { 14196071Sdv142724 /* 14206071Sdv142724 * We woke up unexpectedly, remove ourself. 14216071Sdv142724 */ 14226071Sdv142724 list_remove(queue, entry); 14236071Sdv142724 } 14244153Sdv142724 14254153Sdv142724 return (cvres); 14264153Sdv142724 } 14274153Sdv142724 14284153Sdv142724 static void 14294153Sdv142724 msg_rcvq_wakeup_all(list_t *q_ptr) 14304153Sdv142724 { 14314153Sdv142724 msgq_wakeup_t *q_walk; 14324153Sdv142724 14336071Sdv142724 while (q_walk = list_head(q_ptr)) { 14346071Sdv142724 list_remove(q_ptr, q_walk); 14354153Sdv142724 cv_signal(&q_walk->msgw_wake_cv); 14364153Sdv142724 } 14374153Sdv142724 } 14384153Sdv142724 14390Sstevel@tonic-gate /* 14400Sstevel@tonic-gate * msgsys - System entry point for msgctl, msgget, msgrcv, and msgsnd 14410Sstevel@tonic-gate * system calls. 14420Sstevel@tonic-gate */ 14430Sstevel@tonic-gate static ssize_t 14440Sstevel@tonic-gate msgsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, 14450Sstevel@tonic-gate uintptr_t a4, uintptr_t a5) 14460Sstevel@tonic-gate { 14470Sstevel@tonic-gate ssize_t error; 14480Sstevel@tonic-gate 14490Sstevel@tonic-gate switch (opcode) { 14500Sstevel@tonic-gate case MSGGET: 14510Sstevel@tonic-gate error = msgget((key_t)a1, (int)a2); 14520Sstevel@tonic-gate break; 14530Sstevel@tonic-gate case MSGCTL: 14540Sstevel@tonic-gate error = msgctl((int)a1, (int)a2, (void *)a3); 14550Sstevel@tonic-gate break; 14560Sstevel@tonic-gate case MSGRCV: 14570Sstevel@tonic-gate error = msgrcv((int)a1, (struct ipcmsgbuf *)a2, 14580Sstevel@tonic-gate (size_t)a3, (long)a4, (int)a5); 14590Sstevel@tonic-gate break; 14600Sstevel@tonic-gate case MSGSND: 14610Sstevel@tonic-gate error = msgsnd((int)a1, (struct ipcmsgbuf *)a2, 14620Sstevel@tonic-gate (size_t)a3, (int)a4); 14630Sstevel@tonic-gate break; 14640Sstevel@tonic-gate case MSGIDS: 14650Sstevel@tonic-gate error = msgids((int *)a1, (uint_t)a2, (uint_t *)a3); 14660Sstevel@tonic-gate break; 14670Sstevel@tonic-gate case MSGSNAP: 14680Sstevel@tonic-gate error = msgsnap((int)a1, (caddr_t)a2, (size_t)a3, (long)a4); 14690Sstevel@tonic-gate break; 14700Sstevel@tonic-gate default: 14710Sstevel@tonic-gate error = set_errno(EINVAL); 14720Sstevel@tonic-gate break; 14730Sstevel@tonic-gate } 14740Sstevel@tonic-gate 14750Sstevel@tonic-gate return (error); 14760Sstevel@tonic-gate } 14770Sstevel@tonic-gate 14788834SDavid.Valin@Sun.COM /* 14798834SDavid.Valin@Sun.COM * Determine if a writer who is waiting can process its message. If so 14808834SDavid.Valin@Sun.COM * wake it up. 14818834SDavid.Valin@Sun.COM */ 14828834SDavid.Valin@Sun.COM static void 14838834SDavid.Valin@Sun.COM msg_wakeup_senders(kmsqid_t *qp) 14848834SDavid.Valin@Sun.COM 14858834SDavid.Valin@Sun.COM { 14868834SDavid.Valin@Sun.COM struct msgq_wakeup *ptr, *optr; 14878834SDavid.Valin@Sun.COM size_t avail, smallest; 14888834SDavid.Valin@Sun.COM int msgs_out; 14898834SDavid.Valin@Sun.COM 14908834SDavid.Valin@Sun.COM /* 14918834SDavid.Valin@Sun.COM * Is there a writer waiting, and if so, can it be serviced? If 14928834SDavid.Valin@Sun.COM * not return back to the caller. 14938834SDavid.Valin@Sun.COM */ 14948834SDavid.Valin@Sun.COM if (IPC_FREE(&qp->msg_perm) || qp->msg_qnum >= qp->msg_qmax) 14958834SDavid.Valin@Sun.COM return; 14968834SDavid.Valin@Sun.COM 14978834SDavid.Valin@Sun.COM avail = qp->msg_qbytes - qp->msg_cbytes; 14988834SDavid.Valin@Sun.COM if (avail < qp->msg_snd_smallest) 14998834SDavid.Valin@Sun.COM return; 15008834SDavid.Valin@Sun.COM 15018834SDavid.Valin@Sun.COM ptr = list_head(&qp->msg_wait_rcv); 15028834SDavid.Valin@Sun.COM if (ptr == NULL) { 15038834SDavid.Valin@Sun.COM qp->msg_snd_smallest = MSG_SMALL_INIT; 15048834SDavid.Valin@Sun.COM return; 15058834SDavid.Valin@Sun.COM } 15068834SDavid.Valin@Sun.COM optr = ptr; 15078834SDavid.Valin@Sun.COM 15088834SDavid.Valin@Sun.COM /* 15098834SDavid.Valin@Sun.COM * smallest: minimum message size of all queued writers 15108834SDavid.Valin@Sun.COM * 15118834SDavid.Valin@Sun.COM * avail: amount of space left on the msgq 15128834SDavid.Valin@Sun.COM * if all the writers we have woken up are successful. 15138834SDavid.Valin@Sun.COM * 15148834SDavid.Valin@Sun.COM * msgs_out: is the number of messages on the message queue if 15158834SDavid.Valin@Sun.COM * all the writers we have woken up are successful. 15168834SDavid.Valin@Sun.COM */ 15178834SDavid.Valin@Sun.COM 15188834SDavid.Valin@Sun.COM smallest = MSG_SMALL_INIT; 15198834SDavid.Valin@Sun.COM msgs_out = qp->msg_qnum; 15208834SDavid.Valin@Sun.COM while (ptr) { 15218834SDavid.Valin@Sun.COM ptr = list_next(&qp->msg_wait_rcv, ptr); 15228834SDavid.Valin@Sun.COM if (optr->msgw_snd_size <= avail) { 15238834SDavid.Valin@Sun.COM list_remove(&qp->msg_wait_rcv, optr); 15248834SDavid.Valin@Sun.COM avail -= optr->msgw_snd_size; 15258834SDavid.Valin@Sun.COM cv_signal(&optr->msgw_wake_cv); 15268834SDavid.Valin@Sun.COM msgs_out++; 15278834SDavid.Valin@Sun.COM if (msgs_out == qp->msg_qmax || 15288834SDavid.Valin@Sun.COM avail < qp->msg_snd_smallest) 15298834SDavid.Valin@Sun.COM break; 15308834SDavid.Valin@Sun.COM } else { 15318834SDavid.Valin@Sun.COM if (smallest > optr->msgw_snd_size) 15328834SDavid.Valin@Sun.COM smallest = optr->msgw_snd_size; 15338834SDavid.Valin@Sun.COM } 15348834SDavid.Valin@Sun.COM optr = ptr; 15358834SDavid.Valin@Sun.COM } 15368834SDavid.Valin@Sun.COM 15378834SDavid.Valin@Sun.COM /* 15388834SDavid.Valin@Sun.COM * Reset the smallest message size if the entire list has been visited 15398834SDavid.Valin@Sun.COM */ 15408834SDavid.Valin@Sun.COM if (ptr == NULL && smallest != MSG_SMALL_INIT) 15418834SDavid.Valin@Sun.COM qp->msg_snd_smallest = smallest; 15428834SDavid.Valin@Sun.COM } 15438834SDavid.Valin@Sun.COM 15440Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 15450Sstevel@tonic-gate /* 15460Sstevel@tonic-gate * msgsys32 - System entry point for msgctl, msgget, msgrcv, and msgsnd 15470Sstevel@tonic-gate * system calls for 32-bit callers on LP64 kernel. 15480Sstevel@tonic-gate */ 15490Sstevel@tonic-gate static ssize32_t 15500Sstevel@tonic-gate msgsys32(int opcode, uint32_t a1, uint32_t a2, uint32_t a3, 15510Sstevel@tonic-gate uint32_t a4, uint32_t a5) 15520Sstevel@tonic-gate { 15530Sstevel@tonic-gate ssize_t error; 15540Sstevel@tonic-gate 15550Sstevel@tonic-gate switch (opcode) { 15560Sstevel@tonic-gate case MSGGET: 15570Sstevel@tonic-gate error = msgget((key_t)a1, (int)a2); 15580Sstevel@tonic-gate break; 15590Sstevel@tonic-gate case MSGCTL: 15600Sstevel@tonic-gate error = msgctl((int)a1, (int)a2, (void *)(uintptr_t)a3); 15610Sstevel@tonic-gate break; 15620Sstevel@tonic-gate case MSGRCV: 15630Sstevel@tonic-gate error = msgrcv((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2, 15640Sstevel@tonic-gate (size_t)a3, (long)(int32_t)a4, (int)a5); 15650Sstevel@tonic-gate break; 15660Sstevel@tonic-gate case MSGSND: 15670Sstevel@tonic-gate error = msgsnd((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2, 15680Sstevel@tonic-gate (size_t)(int32_t)a3, (int)a4); 15690Sstevel@tonic-gate break; 15700Sstevel@tonic-gate case MSGIDS: 15710Sstevel@tonic-gate error = msgids((int *)(uintptr_t)a1, (uint_t)a2, 15720Sstevel@tonic-gate (uint_t *)(uintptr_t)a3); 15730Sstevel@tonic-gate break; 15740Sstevel@tonic-gate case MSGSNAP: 15750Sstevel@tonic-gate error = msgsnap((int)a1, (caddr_t)(uintptr_t)a2, (size_t)a3, 15760Sstevel@tonic-gate (long)(int32_t)a4); 15770Sstevel@tonic-gate break; 15780Sstevel@tonic-gate default: 15790Sstevel@tonic-gate error = set_errno(EINVAL); 15800Sstevel@tonic-gate break; 15810Sstevel@tonic-gate } 15820Sstevel@tonic-gate 15830Sstevel@tonic-gate return (error); 15840Sstevel@tonic-gate } 15850Sstevel@tonic-gate #endif /* SYSCALL32_IMPL */ 1586