xref: /onnv-gate/usr/src/uts/i86pc/os/x_call.c (revision 9489:7aad39a516b4)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52006Sandrei  * Common Development and Distribution License (the "License").
62006Sandrei  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*9489SJoe.Bonasera@sun.com  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <sys/types.h>
270Sstevel@tonic-gate #include <sys/param.h>
280Sstevel@tonic-gate #include <sys/t_lock.h>
290Sstevel@tonic-gate #include <sys/thread.h>
300Sstevel@tonic-gate #include <sys/cpuvar.h>
310Sstevel@tonic-gate #include <sys/x_call.h>
32*9489SJoe.Bonasera@sun.com #include <sys/xc_levels.h>
330Sstevel@tonic-gate #include <sys/cpu.h>
340Sstevel@tonic-gate #include <sys/psw.h>
350Sstevel@tonic-gate #include <sys/sunddi.h>
360Sstevel@tonic-gate #include <sys/debug.h>
370Sstevel@tonic-gate #include <sys/systm.h>
383446Smrj #include <sys/archsystm.h>
390Sstevel@tonic-gate #include <sys/machsystm.h>
400Sstevel@tonic-gate #include <sys/mutex_impl.h>
41*9489SJoe.Bonasera@sun.com #include <sys/stack.h>
42*9489SJoe.Bonasera@sun.com #include <sys/promif.h>
43*9489SJoe.Bonasera@sun.com #include <sys/x86_archext.h>
440Sstevel@tonic-gate 
45*9489SJoe.Bonasera@sun.com /*
46*9489SJoe.Bonasera@sun.com  * Implementation for cross-processor calls via interprocessor interrupts
47*9489SJoe.Bonasera@sun.com  *
48*9489SJoe.Bonasera@sun.com  * This implementation uses a message passing architecture to allow multiple
49*9489SJoe.Bonasera@sun.com  * concurrent cross calls to be in flight at any given time. We use the cmpxchg
50*9489SJoe.Bonasera@sun.com  * instruction, aka casptr(), to implement simple efficient work queues for
51*9489SJoe.Bonasera@sun.com  * message passing between CPUs with almost no need for regular locking.
52*9489SJoe.Bonasera@sun.com  * See xc_extract() and xc_insert() below.
53*9489SJoe.Bonasera@sun.com  *
54*9489SJoe.Bonasera@sun.com  * The general idea is that initiating a cross call means putting a message
55*9489SJoe.Bonasera@sun.com  * on a target(s) CPU's work queue. Any synchronization is handled by passing
56*9489SJoe.Bonasera@sun.com  * the message back and forth between initiator and target(s).
57*9489SJoe.Bonasera@sun.com  *
58*9489SJoe.Bonasera@sun.com  * Every CPU has xc_work_cnt, which indicates it has messages to process.
59*9489SJoe.Bonasera@sun.com  * This value is incremented as message traffic is initiated and decremented
60*9489SJoe.Bonasera@sun.com  * with every message that finishes all processing.
61*9489SJoe.Bonasera@sun.com  *
62*9489SJoe.Bonasera@sun.com  * The code needs no mfence or other membar_*() calls. The uses of
63*9489SJoe.Bonasera@sun.com  * casptr(), cas32() and atomic_dec_32() for the message passing are
64*9489SJoe.Bonasera@sun.com  * implemented with LOCK prefix instructions which are equivalent to mfence.
65*9489SJoe.Bonasera@sun.com  *
66*9489SJoe.Bonasera@sun.com  * One interesting aspect of this implmentation is that it allows 2 or more
67*9489SJoe.Bonasera@sun.com  * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
68*9489SJoe.Bonasera@sun.com  * The cross call processing by the CPUs will happen in any order with only
69*9489SJoe.Bonasera@sun.com  * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
70*9489SJoe.Bonasera@sun.com  * from cross calls before all slaves have invoked the function.
71*9489SJoe.Bonasera@sun.com  *
72*9489SJoe.Bonasera@sun.com  * The reason for this asynchronous approach is to allow for fast global
73*9489SJoe.Bonasera@sun.com  * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
74*9489SJoe.Bonasera@sun.com  * on a different Virtual Address at the same time. The old code required
75*9489SJoe.Bonasera@sun.com  * N squared IPIs. With this method, depending on timing, it could happen
76*9489SJoe.Bonasera@sun.com  * with just N IPIs.
77*9489SJoe.Bonasera@sun.com  */
780Sstevel@tonic-gate 
79*9489SJoe.Bonasera@sun.com /*
80*9489SJoe.Bonasera@sun.com  * The default is to not enable collecting counts of IPI information, since
81*9489SJoe.Bonasera@sun.com  * the updating of shared cachelines could cause excess bus traffic.
82*9489SJoe.Bonasera@sun.com  */
83*9489SJoe.Bonasera@sun.com uint_t xc_collect_enable = 0;
84*9489SJoe.Bonasera@sun.com uint64_t xc_total_cnt = 0;	/* total #IPIs sent for cross calls */
85*9489SJoe.Bonasera@sun.com uint64_t xc_multi_cnt = 0;	/* # times we piggy backed on another IPI */
86*9489SJoe.Bonasera@sun.com 
87*9489SJoe.Bonasera@sun.com /*
88*9489SJoe.Bonasera@sun.com  * Values for message states. Here are the normal transitions. A transition
89*9489SJoe.Bonasera@sun.com  * of "->" happens in the slave cpu and "=>" happens in the master cpu as
90*9489SJoe.Bonasera@sun.com  * the messages are passed back and forth.
91*9489SJoe.Bonasera@sun.com  *
92*9489SJoe.Bonasera@sun.com  * FREE => ASYNC ->                       DONE => FREE
93*9489SJoe.Bonasera@sun.com  * FREE => CALL ->                        DONE => FREE
94*9489SJoe.Bonasera@sun.com  * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
95*9489SJoe.Bonasera@sun.com  *
96*9489SJoe.Bonasera@sun.com  * The interesing one above is ASYNC. You might ask, why not go directly
97*9489SJoe.Bonasera@sun.com  * to FREE, instead of DONE. If it did that, it might be possible to exhaust
98*9489SJoe.Bonasera@sun.com  * the master's xc_free list if a master can generate ASYNC messages faster
99*9489SJoe.Bonasera@sun.com  * then the slave can process them. That could be handled with more complicated
100*9489SJoe.Bonasera@sun.com  * handling. However since nothing important uses ASYNC, I've not bothered.
101*9489SJoe.Bonasera@sun.com  */
102*9489SJoe.Bonasera@sun.com #define	XC_MSG_FREE	(0)	/* msg in xc_free queue */
103*9489SJoe.Bonasera@sun.com #define	XC_MSG_ASYNC	(1)	/* msg in slave xc_msgbox */
104*9489SJoe.Bonasera@sun.com #define	XC_MSG_CALL	(2)	/* msg in slave xc_msgbox */
105*9489SJoe.Bonasera@sun.com #define	XC_MSG_SYNC	(3)	/* msg in slave xc_msgbox */
106*9489SJoe.Bonasera@sun.com #define	XC_MSG_WAITING	(4)	/* msg in master xc_msgbox or xc_waiters */
107*9489SJoe.Bonasera@sun.com #define	XC_MSG_RELEASED	(5)	/* msg in slave xc_msgbox */
108*9489SJoe.Bonasera@sun.com #define	XC_MSG_DONE	(6)	/* msg in master xc_msgbox */
109*9489SJoe.Bonasera@sun.com 
110*9489SJoe.Bonasera@sun.com /*
111*9489SJoe.Bonasera@sun.com  * We allow for one high priority message at a time to happen in the system.
112*9489SJoe.Bonasera@sun.com  * This is used for panic, kmdb, etc., so no locking is done.
113*9489SJoe.Bonasera@sun.com  */
114*9489SJoe.Bonasera@sun.com static cpuset_t xc_priority_set;
115*9489SJoe.Bonasera@sun.com static xc_data_t xc_priority_data;
116*9489SJoe.Bonasera@sun.com 
117*9489SJoe.Bonasera@sun.com /*
118*9489SJoe.Bonasera@sun.com  * Decrement a CPU's work count
119*9489SJoe.Bonasera@sun.com  */
120*9489SJoe.Bonasera@sun.com static void
121*9489SJoe.Bonasera@sun.com xc_decrement(struct machcpu *mcpu)
1220Sstevel@tonic-gate {
123*9489SJoe.Bonasera@sun.com 	atomic_dec_32(&mcpu->xc_work_cnt);
1240Sstevel@tonic-gate }
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate /*
127*9489SJoe.Bonasera@sun.com  * Increment a CPU's work count and return the old value
1280Sstevel@tonic-gate  */
129*9489SJoe.Bonasera@sun.com static int
130*9489SJoe.Bonasera@sun.com xc_increment(struct machcpu *mcpu)
1310Sstevel@tonic-gate {
132*9489SJoe.Bonasera@sun.com 	int old;
133*9489SJoe.Bonasera@sun.com 	do {
134*9489SJoe.Bonasera@sun.com 		old = mcpu->xc_work_cnt;
135*9489SJoe.Bonasera@sun.com 	} while (cas32((uint32_t *)&mcpu->xc_work_cnt, old, old + 1) != old);
136*9489SJoe.Bonasera@sun.com 	return (old);
137*9489SJoe.Bonasera@sun.com }
1383446Smrj 
139*9489SJoe.Bonasera@sun.com /*
140*9489SJoe.Bonasera@sun.com  * Put a message into a queue. The insertion is atomic no matter
141*9489SJoe.Bonasera@sun.com  * how many different inserts/extracts to the same queue happen.
142*9489SJoe.Bonasera@sun.com  */
143*9489SJoe.Bonasera@sun.com static void
144*9489SJoe.Bonasera@sun.com xc_insert(void *queue, xc_msg_t *msg)
145*9489SJoe.Bonasera@sun.com {
146*9489SJoe.Bonasera@sun.com 	xc_msg_t *old_head;
147*9489SJoe.Bonasera@sun.com 	do {
148*9489SJoe.Bonasera@sun.com 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
149*9489SJoe.Bonasera@sun.com 		msg->xc_next = old_head;
150*9489SJoe.Bonasera@sun.com 	} while (casptr(queue, old_head, msg) != old_head);
1510Sstevel@tonic-gate }
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate /*
154*9489SJoe.Bonasera@sun.com  * Extract a message from a queue. The extraction is atomic only
155*9489SJoe.Bonasera@sun.com  * when just one thread does extractions from the queue.
156*9489SJoe.Bonasera@sun.com  * If the queue is empty, NULL is returned.
157*9489SJoe.Bonasera@sun.com  */
158*9489SJoe.Bonasera@sun.com static xc_msg_t *
159*9489SJoe.Bonasera@sun.com xc_extract(xc_msg_t **queue)
160*9489SJoe.Bonasera@sun.com {
161*9489SJoe.Bonasera@sun.com 	xc_msg_t *old_head;
162*9489SJoe.Bonasera@sun.com 
163*9489SJoe.Bonasera@sun.com 	do {
164*9489SJoe.Bonasera@sun.com 		old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
165*9489SJoe.Bonasera@sun.com 		if (old_head == NULL)
166*9489SJoe.Bonasera@sun.com 			return (old_head);
167*9489SJoe.Bonasera@sun.com 	} while (casptr(queue, old_head, old_head->xc_next) != old_head);
168*9489SJoe.Bonasera@sun.com 	old_head->xc_next = NULL;
169*9489SJoe.Bonasera@sun.com 	return (old_head);
170*9489SJoe.Bonasera@sun.com }
171*9489SJoe.Bonasera@sun.com 
172*9489SJoe.Bonasera@sun.com 
173*9489SJoe.Bonasera@sun.com /*
174*9489SJoe.Bonasera@sun.com  * Initialize the machcpu fields used for cross calls
175*9489SJoe.Bonasera@sun.com  */
176*9489SJoe.Bonasera@sun.com static uint_t xc_initialized = 0;
177*9489SJoe.Bonasera@sun.com void
178*9489SJoe.Bonasera@sun.com xc_init_cpu(struct cpu *cpup)
179*9489SJoe.Bonasera@sun.com {
180*9489SJoe.Bonasera@sun.com 	xc_msg_t *msg;
181*9489SJoe.Bonasera@sun.com 	int c;
182*9489SJoe.Bonasera@sun.com 
183*9489SJoe.Bonasera@sun.com 	/*
184*9489SJoe.Bonasera@sun.com 	 * add a new msg to each existing CPU's free list, as well as one for
185*9489SJoe.Bonasera@sun.com 	 * my list for each of them
186*9489SJoe.Bonasera@sun.com 	 */
187*9489SJoe.Bonasera@sun.com 	for (c = 0; c < ncpus; ++c) {
188*9489SJoe.Bonasera@sun.com 		if (cpu[c] == NULL)
189*9489SJoe.Bonasera@sun.com 			continue;
190*9489SJoe.Bonasera@sun.com 		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
191*9489SJoe.Bonasera@sun.com 		msg->xc_command = XC_MSG_FREE;
192*9489SJoe.Bonasera@sun.com 		xc_insert(&cpu[c]->cpu_m.xc_free, msg);
193*9489SJoe.Bonasera@sun.com 
194*9489SJoe.Bonasera@sun.com 		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
195*9489SJoe.Bonasera@sun.com 		msg->xc_command = XC_MSG_FREE;
196*9489SJoe.Bonasera@sun.com 		xc_insert(&cpup->cpu_m.xc_free, msg);
197*9489SJoe.Bonasera@sun.com 	}
198*9489SJoe.Bonasera@sun.com 
199*9489SJoe.Bonasera@sun.com 	/*
200*9489SJoe.Bonasera@sun.com 	 * Add one for self messages
201*9489SJoe.Bonasera@sun.com 	 */
202*9489SJoe.Bonasera@sun.com 	msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
203*9489SJoe.Bonasera@sun.com 	msg->xc_command = XC_MSG_FREE;
204*9489SJoe.Bonasera@sun.com 	xc_insert(&cpup->cpu_m.xc_free, msg);
205*9489SJoe.Bonasera@sun.com 
206*9489SJoe.Bonasera@sun.com 	if (!xc_initialized)
207*9489SJoe.Bonasera@sun.com 		xc_initialized = 1;
208*9489SJoe.Bonasera@sun.com }
209*9489SJoe.Bonasera@sun.com 
210*9489SJoe.Bonasera@sun.com /*
211*9489SJoe.Bonasera@sun.com  * X-call message processing routine. Note that this is used by both
212*9489SJoe.Bonasera@sun.com  * senders and recipients of messages.
2130Sstevel@tonic-gate  *
214*9489SJoe.Bonasera@sun.com  * We're protected against changing CPUs by either being in a high-priority
215*9489SJoe.Bonasera@sun.com  * interrupt, having preemption disabled or by having a raised SPL.
2160Sstevel@tonic-gate  */
2170Sstevel@tonic-gate /*ARGSUSED*/
2180Sstevel@tonic-gate uint_t
2190Sstevel@tonic-gate xc_serv(caddr_t arg1, caddr_t arg2)
2200Sstevel@tonic-gate {
221*9489SJoe.Bonasera@sun.com 	struct machcpu *mcpup = &(CPU->cpu_m);
222*9489SJoe.Bonasera@sun.com 	xc_msg_t *msg;
223*9489SJoe.Bonasera@sun.com 	xc_data_t *data;
224*9489SJoe.Bonasera@sun.com 	xc_msg_t *xc_waiters = NULL;
225*9489SJoe.Bonasera@sun.com 	uint32_t num_waiting = 0;
226*9489SJoe.Bonasera@sun.com 	xc_func_t func;
227*9489SJoe.Bonasera@sun.com 	xc_arg_t a1;
228*9489SJoe.Bonasera@sun.com 	xc_arg_t a2;
229*9489SJoe.Bonasera@sun.com 	xc_arg_t a3;
230*9489SJoe.Bonasera@sun.com 	uint_t rc = DDI_INTR_UNCLAIMED;
2313446Smrj 
232*9489SJoe.Bonasera@sun.com 	while (mcpup->xc_work_cnt != 0) {
233*9489SJoe.Bonasera@sun.com 		rc = DDI_INTR_CLAIMED;
2343446Smrj 
235*9489SJoe.Bonasera@sun.com 		/*
236*9489SJoe.Bonasera@sun.com 		 * We may have to wait for a message to arrive.
237*9489SJoe.Bonasera@sun.com 		 */
238*9489SJoe.Bonasera@sun.com 		for (;;) {
239*9489SJoe.Bonasera@sun.com 			/*
240*9489SJoe.Bonasera@sun.com 			 * alway check for and handle a priority message
241*9489SJoe.Bonasera@sun.com 			 */
242*9489SJoe.Bonasera@sun.com 			if (BT_TEST(CPUSET2BV(xc_priority_set), CPU->cpu_id)) {
243*9489SJoe.Bonasera@sun.com 				func = xc_priority_data.xc_func;
244*9489SJoe.Bonasera@sun.com 				a1 = xc_priority_data.xc_a1;
245*9489SJoe.Bonasera@sun.com 				a2 = xc_priority_data.xc_a2;
246*9489SJoe.Bonasera@sun.com 				a3 = xc_priority_data.xc_a3;
247*9489SJoe.Bonasera@sun.com 				BT_CLEAR(CPUSET2BV(xc_priority_set),
248*9489SJoe.Bonasera@sun.com 				    CPU->cpu_id);
249*9489SJoe.Bonasera@sun.com 				xc_decrement(mcpup);
250*9489SJoe.Bonasera@sun.com 				func(a1, a2, a3);
251*9489SJoe.Bonasera@sun.com 				if (mcpup->xc_work_cnt == 0)
252*9489SJoe.Bonasera@sun.com 					return (rc);
253*9489SJoe.Bonasera@sun.com 			}
2540Sstevel@tonic-gate 
255*9489SJoe.Bonasera@sun.com 			/*
256*9489SJoe.Bonasera@sun.com 			 * extract and handle regular message
257*9489SJoe.Bonasera@sun.com 			 */
258*9489SJoe.Bonasera@sun.com 			msg = xc_extract(&mcpup->xc_msgbox);
259*9489SJoe.Bonasera@sun.com 			if (msg != NULL)
260*9489SJoe.Bonasera@sun.com 				break;
2610Sstevel@tonic-gate 
262*9489SJoe.Bonasera@sun.com 			/*
263*9489SJoe.Bonasera@sun.com 			 * wait for a message to arrive
264*9489SJoe.Bonasera@sun.com 			 */
265*9489SJoe.Bonasera@sun.com 			if (x86_feature & X86_MWAIT) {
266*9489SJoe.Bonasera@sun.com 				i86_monitor(
267*9489SJoe.Bonasera@sun.com 				    (volatile uint32_t *)&mcpup->xc_msgbox,
268*9489SJoe.Bonasera@sun.com 				    0, 0);
269*9489SJoe.Bonasera@sun.com 				if (mcpup->xc_msgbox == NULL)
270*9489SJoe.Bonasera@sun.com 					i86_mwait(0, 0);
271*9489SJoe.Bonasera@sun.com 			} else {
272*9489SJoe.Bonasera@sun.com 				SMT_PAUSE();
273*9489SJoe.Bonasera@sun.com 			}
2740Sstevel@tonic-gate 		}
2753446Smrj 
2760Sstevel@tonic-gate 
2773446Smrj 		/*
278*9489SJoe.Bonasera@sun.com 		 * process the message
2793446Smrj 		 */
280*9489SJoe.Bonasera@sun.com 		switch (msg->xc_command) {
2810Sstevel@tonic-gate 
2823446Smrj 		/*
283*9489SJoe.Bonasera@sun.com 		 * ASYNC gives back the message immediately, then we do the
284*9489SJoe.Bonasera@sun.com 		 * function and return with no more waiting.
2853446Smrj 		 */
286*9489SJoe.Bonasera@sun.com 		case XC_MSG_ASYNC:
287*9489SJoe.Bonasera@sun.com 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
288*9489SJoe.Bonasera@sun.com 			func = data->xc_func;
289*9489SJoe.Bonasera@sun.com 			a1 = data->xc_a1;
290*9489SJoe.Bonasera@sun.com 			a2 = data->xc_a2;
291*9489SJoe.Bonasera@sun.com 			a3 = data->xc_a3;
292*9489SJoe.Bonasera@sun.com 			msg->xc_command = XC_MSG_DONE;
293*9489SJoe.Bonasera@sun.com 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
294*9489SJoe.Bonasera@sun.com 			if (func != NULL)
295*9489SJoe.Bonasera@sun.com 				(void) (*func)(a1, a2, a3);
296*9489SJoe.Bonasera@sun.com 			xc_decrement(mcpup);
297*9489SJoe.Bonasera@sun.com 			break;
2980Sstevel@tonic-gate 
299*9489SJoe.Bonasera@sun.com 		/*
300*9489SJoe.Bonasera@sun.com 		 * SYNC messages do the call, then send it back to the master
301*9489SJoe.Bonasera@sun.com 		 * in WAITING mode
302*9489SJoe.Bonasera@sun.com 		 */
303*9489SJoe.Bonasera@sun.com 		case XC_MSG_SYNC:
304*9489SJoe.Bonasera@sun.com 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
305*9489SJoe.Bonasera@sun.com 			if (data->xc_func != NULL)
306*9489SJoe.Bonasera@sun.com 				(void) (*data->xc_func)(data->xc_a1,
307*9489SJoe.Bonasera@sun.com 				    data->xc_a2, data->xc_a3);
308*9489SJoe.Bonasera@sun.com 			msg->xc_command = XC_MSG_WAITING;
309*9489SJoe.Bonasera@sun.com 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
310*9489SJoe.Bonasera@sun.com 			break;
3110Sstevel@tonic-gate 
312*9489SJoe.Bonasera@sun.com 		/*
313*9489SJoe.Bonasera@sun.com 		 * WAITING messsages are collected by the master until all
314*9489SJoe.Bonasera@sun.com 		 * have arrived. Once all arrive, we release them back to
315*9489SJoe.Bonasera@sun.com 		 * the slaves
316*9489SJoe.Bonasera@sun.com 		 */
317*9489SJoe.Bonasera@sun.com 		case XC_MSG_WAITING:
318*9489SJoe.Bonasera@sun.com 			xc_insert(&xc_waiters, msg);
319*9489SJoe.Bonasera@sun.com 			if (++num_waiting < mcpup->xc_wait_cnt)
320*9489SJoe.Bonasera@sun.com 				break;
321*9489SJoe.Bonasera@sun.com 			while ((msg = xc_extract(&xc_waiters)) != NULL) {
322*9489SJoe.Bonasera@sun.com 				msg->xc_command = XC_MSG_RELEASED;
323*9489SJoe.Bonasera@sun.com 				xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
324*9489SJoe.Bonasera@sun.com 				    msg);
325*9489SJoe.Bonasera@sun.com 				--num_waiting;
326*9489SJoe.Bonasera@sun.com 			}
327*9489SJoe.Bonasera@sun.com 			if (num_waiting != 0)
328*9489SJoe.Bonasera@sun.com 				panic("wrong number waiting");
329*9489SJoe.Bonasera@sun.com 			mcpup->xc_wait_cnt = 0;
330*9489SJoe.Bonasera@sun.com 			break;
3310Sstevel@tonic-gate 
332*9489SJoe.Bonasera@sun.com 		/*
333*9489SJoe.Bonasera@sun.com 		 * CALL messages do the function and then, like RELEASE,
334*9489SJoe.Bonasera@sun.com 		 * send the message is back to master as DONE.
335*9489SJoe.Bonasera@sun.com 		 */
336*9489SJoe.Bonasera@sun.com 		case XC_MSG_CALL:
337*9489SJoe.Bonasera@sun.com 			data = &cpu[msg->xc_master]->cpu_m.xc_data;
338*9489SJoe.Bonasera@sun.com 			if (data->xc_func != NULL)
339*9489SJoe.Bonasera@sun.com 				(void) (*data->xc_func)(data->xc_a1,
340*9489SJoe.Bonasera@sun.com 				    data->xc_a2, data->xc_a3);
341*9489SJoe.Bonasera@sun.com 			/*FALLTHROUGH*/
342*9489SJoe.Bonasera@sun.com 		case XC_MSG_RELEASED:
343*9489SJoe.Bonasera@sun.com 			msg->xc_command = XC_MSG_DONE;
344*9489SJoe.Bonasera@sun.com 			xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
345*9489SJoe.Bonasera@sun.com 			xc_decrement(mcpup);
346*9489SJoe.Bonasera@sun.com 			break;
3470Sstevel@tonic-gate 
348*9489SJoe.Bonasera@sun.com 		/*
349*9489SJoe.Bonasera@sun.com 		 * DONE means a slave has completely finished up.
350*9489SJoe.Bonasera@sun.com 		 * Once we collect all the DONE messages, we'll exit
351*9489SJoe.Bonasera@sun.com 		 * processing too.
352*9489SJoe.Bonasera@sun.com 		 */
353*9489SJoe.Bonasera@sun.com 		case XC_MSG_DONE:
354*9489SJoe.Bonasera@sun.com 			msg->xc_command = XC_MSG_FREE;
355*9489SJoe.Bonasera@sun.com 			xc_insert(&mcpup->xc_free, msg);
356*9489SJoe.Bonasera@sun.com 			xc_decrement(mcpup);
357*9489SJoe.Bonasera@sun.com 			break;
358*9489SJoe.Bonasera@sun.com 
359*9489SJoe.Bonasera@sun.com 		case XC_MSG_FREE:
360*9489SJoe.Bonasera@sun.com 			panic("free message in msgbox");
361*9489SJoe.Bonasera@sun.com 			break;
362*9489SJoe.Bonasera@sun.com 
363*9489SJoe.Bonasera@sun.com 		default:
364*9489SJoe.Bonasera@sun.com 			panic("bad message in msgbox");
365*9489SJoe.Bonasera@sun.com 			break;
366*9489SJoe.Bonasera@sun.com 		}
367*9489SJoe.Bonasera@sun.com 	}
368*9489SJoe.Bonasera@sun.com 	return (rc);
3690Sstevel@tonic-gate }
3700Sstevel@tonic-gate 
3713543Sjosephb /*
372*9489SJoe.Bonasera@sun.com  * Initiate cross call processing.
3730Sstevel@tonic-gate  */
3740Sstevel@tonic-gate static void
3750Sstevel@tonic-gate xc_common(
3760Sstevel@tonic-gate 	xc_func_t func,
3770Sstevel@tonic-gate 	xc_arg_t arg1,
3780Sstevel@tonic-gate 	xc_arg_t arg2,
3790Sstevel@tonic-gate 	xc_arg_t arg3,
380*9489SJoe.Bonasera@sun.com 	ulong_t *set,
381*9489SJoe.Bonasera@sun.com 	uint_t command)
3820Sstevel@tonic-gate {
383*9489SJoe.Bonasera@sun.com 	int c;
3840Sstevel@tonic-gate 	struct cpu *cpup;
385*9489SJoe.Bonasera@sun.com 	xc_msg_t *msg;
386*9489SJoe.Bonasera@sun.com 	xc_data_t *data;
387*9489SJoe.Bonasera@sun.com 	int cnt;
388*9489SJoe.Bonasera@sun.com 	int save_spl;
3890Sstevel@tonic-gate 
390*9489SJoe.Bonasera@sun.com 	if (!xc_initialized) {
391*9489SJoe.Bonasera@sun.com 		if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
392*9489SJoe.Bonasera@sun.com 		    func != NULL)
393*9489SJoe.Bonasera@sun.com 			(void) (*func)(arg1, arg2, arg3);
394*9489SJoe.Bonasera@sun.com 		return;
395*9489SJoe.Bonasera@sun.com 	}
3960Sstevel@tonic-gate 
397*9489SJoe.Bonasera@sun.com 	save_spl = splr(ipltospl(XC_HI_PIL));
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate 	/*
400*9489SJoe.Bonasera@sun.com 	 * fill in cross call data
4010Sstevel@tonic-gate 	 */
402*9489SJoe.Bonasera@sun.com 	data = &CPU->cpu_m.xc_data;
403*9489SJoe.Bonasera@sun.com 	data->xc_func = func;
404*9489SJoe.Bonasera@sun.com 	data->xc_a1 = arg1;
405*9489SJoe.Bonasera@sun.com 	data->xc_a2 = arg2;
406*9489SJoe.Bonasera@sun.com 	data->xc_a3 = arg3;
4076336Sbholler 
4080Sstevel@tonic-gate 	/*
409*9489SJoe.Bonasera@sun.com 	 * Post messages to all CPUs involved that are CPU_READY
4100Sstevel@tonic-gate 	 */
411*9489SJoe.Bonasera@sun.com 	CPU->cpu_m.xc_wait_cnt = 0;
412*9489SJoe.Bonasera@sun.com 	for (c = 0; c < ncpus; ++c) {
413*9489SJoe.Bonasera@sun.com 		if (!BT_TEST(set, c))
414*9489SJoe.Bonasera@sun.com 			continue;
415*9489SJoe.Bonasera@sun.com 		cpup = cpu[c];
416*9489SJoe.Bonasera@sun.com 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
4176336Sbholler 			continue;
4186336Sbholler 
419*9489SJoe.Bonasera@sun.com 		/*
420*9489SJoe.Bonasera@sun.com 		 * Fill out a new message.
421*9489SJoe.Bonasera@sun.com 		 */
422*9489SJoe.Bonasera@sun.com 		msg = xc_extract(&CPU->cpu_m.xc_free);
423*9489SJoe.Bonasera@sun.com 		if (msg == NULL)
424*9489SJoe.Bonasera@sun.com 			panic("Ran out of free xc_msg_t's");
425*9489SJoe.Bonasera@sun.com 		msg->xc_command = command;
426*9489SJoe.Bonasera@sun.com 		msg->xc_master = CPU->cpu_id;
427*9489SJoe.Bonasera@sun.com 		msg->xc_slave = c;
4280Sstevel@tonic-gate 
429*9489SJoe.Bonasera@sun.com 		/*
430*9489SJoe.Bonasera@sun.com 		 * Increment my work count for all messages that I'll
431*9489SJoe.Bonasera@sun.com 		 * transition from DONE to FREE.
432*9489SJoe.Bonasera@sun.com 		 * Also remember how many XC_MSG_WAITINGs to look for
433*9489SJoe.Bonasera@sun.com 		 */
434*9489SJoe.Bonasera@sun.com 		(void) xc_increment(&CPU->cpu_m);
435*9489SJoe.Bonasera@sun.com 		if (command == XC_MSG_SYNC)
436*9489SJoe.Bonasera@sun.com 			++CPU->cpu_m.xc_wait_cnt;
4370Sstevel@tonic-gate 
438*9489SJoe.Bonasera@sun.com 		/*
439*9489SJoe.Bonasera@sun.com 		 * Increment the target CPU work count then insert the message
440*9489SJoe.Bonasera@sun.com 		 * in the target msgbox. If I post the first bit of work
441*9489SJoe.Bonasera@sun.com 		 * for the target to do, send an IPI to the target CPU.
442*9489SJoe.Bonasera@sun.com 		 */
443*9489SJoe.Bonasera@sun.com 		cnt = xc_increment(&cpup->cpu_m);
444*9489SJoe.Bonasera@sun.com 		xc_insert(&cpup->cpu_m.xc_msgbox, msg);
445*9489SJoe.Bonasera@sun.com 		if (cpup != CPU) {
446*9489SJoe.Bonasera@sun.com 			if (cnt == 0) {
447*9489SJoe.Bonasera@sun.com 				CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
448*9489SJoe.Bonasera@sun.com 				send_dirint(c, XC_HI_PIL);
449*9489SJoe.Bonasera@sun.com 				if (xc_collect_enable)
450*9489SJoe.Bonasera@sun.com 					++xc_total_cnt;
451*9489SJoe.Bonasera@sun.com 			} else if (xc_collect_enable) {
452*9489SJoe.Bonasera@sun.com 				++xc_multi_cnt;
4530Sstevel@tonic-gate 			}
4540Sstevel@tonic-gate 		}
4550Sstevel@tonic-gate 	}
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	/*
458*9489SJoe.Bonasera@sun.com 	 * Now drop into the message handler until all work is done
459*9489SJoe.Bonasera@sun.com 	 */
460*9489SJoe.Bonasera@sun.com 	(void) xc_serv(NULL, NULL);
461*9489SJoe.Bonasera@sun.com 	splx(save_spl);
462*9489SJoe.Bonasera@sun.com }
463*9489SJoe.Bonasera@sun.com 
464*9489SJoe.Bonasera@sun.com /*
465*9489SJoe.Bonasera@sun.com  * Push out a priority cross call.
466*9489SJoe.Bonasera@sun.com  */
467*9489SJoe.Bonasera@sun.com static void
468*9489SJoe.Bonasera@sun.com xc_priority_common(
469*9489SJoe.Bonasera@sun.com 	xc_func_t func,
470*9489SJoe.Bonasera@sun.com 	xc_arg_t arg1,
471*9489SJoe.Bonasera@sun.com 	xc_arg_t arg2,
472*9489SJoe.Bonasera@sun.com 	xc_arg_t arg3,
473*9489SJoe.Bonasera@sun.com 	ulong_t *set)
474*9489SJoe.Bonasera@sun.com {
475*9489SJoe.Bonasera@sun.com 	int i;
476*9489SJoe.Bonasera@sun.com 	int c;
477*9489SJoe.Bonasera@sun.com 	struct cpu *cpup;
478*9489SJoe.Bonasera@sun.com 
479*9489SJoe.Bonasera@sun.com 	/*
480*9489SJoe.Bonasera@sun.com 	 * Wait briefly for a previous xc_priority to have finished, but
481*9489SJoe.Bonasera@sun.com 	 * continue no matter what.
4820Sstevel@tonic-gate 	 */
483*9489SJoe.Bonasera@sun.com 	for (i = 0; i < 40000; ++i) {
484*9489SJoe.Bonasera@sun.com 		if (CPUSET_ISNULL(xc_priority_set))
485*9489SJoe.Bonasera@sun.com 			break;
486*9489SJoe.Bonasera@sun.com 		SMT_PAUSE();
487*9489SJoe.Bonasera@sun.com 	}
488*9489SJoe.Bonasera@sun.com 
489*9489SJoe.Bonasera@sun.com 	/*
490*9489SJoe.Bonasera@sun.com 	 * fill in cross call data
491*9489SJoe.Bonasera@sun.com 	 */
492*9489SJoe.Bonasera@sun.com 	xc_priority_data.xc_func = func;
493*9489SJoe.Bonasera@sun.com 	xc_priority_data.xc_a1 = arg1;
494*9489SJoe.Bonasera@sun.com 	xc_priority_data.xc_a2 = arg2;
495*9489SJoe.Bonasera@sun.com 	xc_priority_data.xc_a3 = arg3;
496*9489SJoe.Bonasera@sun.com 	xc_priority_set = *(cpuset_t *)set;
497*9489SJoe.Bonasera@sun.com 
498*9489SJoe.Bonasera@sun.com 	/*
499*9489SJoe.Bonasera@sun.com 	 * Post messages to all CPUs involved that are CPU_READY
500*9489SJoe.Bonasera@sun.com 	 * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
501*9489SJoe.Bonasera@sun.com 	 */
502*9489SJoe.Bonasera@sun.com 	for (c = 0; c < ncpus; ++c) {
503*9489SJoe.Bonasera@sun.com 		if (!BT_TEST(set, c))
504*9489SJoe.Bonasera@sun.com 			continue;
505*9489SJoe.Bonasera@sun.com 		cpup = cpu[c];
506*9489SJoe.Bonasera@sun.com 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
507*9489SJoe.Bonasera@sun.com 		    cpup == CPU)
508*9489SJoe.Bonasera@sun.com 			continue;
509*9489SJoe.Bonasera@sun.com 		(void) xc_increment(&cpup->cpu_m);
510*9489SJoe.Bonasera@sun.com 		send_dirint(c, XC_HI_PIL);
511*9489SJoe.Bonasera@sun.com 		for (i = 0; i < 10; ++i) {
512*9489SJoe.Bonasera@sun.com 			(void) casptr(&cpup->cpu_m.xc_msgbox,
513*9489SJoe.Bonasera@sun.com 			    cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
5140Sstevel@tonic-gate 		}
5150Sstevel@tonic-gate 	}
5160Sstevel@tonic-gate }
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate /*
519*9489SJoe.Bonasera@sun.com  * Do cross call to all other CPUs with absolutely no waiting or handshaking.
520*9489SJoe.Bonasera@sun.com  * This should only be used for extraordinary operations, like panic(), which
521*9489SJoe.Bonasera@sun.com  * need to work, in some fashion, in a not completely functional system.
522*9489SJoe.Bonasera@sun.com  * All other uses that want minimal waiting should use xc_call_nowait().
5230Sstevel@tonic-gate  */
5240Sstevel@tonic-gate void
525*9489SJoe.Bonasera@sun.com xc_priority(
5260Sstevel@tonic-gate 	xc_arg_t arg1,
5270Sstevel@tonic-gate 	xc_arg_t arg2,
5280Sstevel@tonic-gate 	xc_arg_t arg3,
529*9489SJoe.Bonasera@sun.com 	ulong_t *set,
5300Sstevel@tonic-gate 	xc_func_t func)
5310Sstevel@tonic-gate {
532*9489SJoe.Bonasera@sun.com 	extern int IGNORE_KERNEL_PREEMPTION;
533*9489SJoe.Bonasera@sun.com 	int save_spl = splr(ipltospl(XC_HI_PIL));
534*9489SJoe.Bonasera@sun.com 	int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = 1;
537*9489SJoe.Bonasera@sun.com 	xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
5380Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
539*9489SJoe.Bonasera@sun.com 	splx(save_spl);
5400Sstevel@tonic-gate }
5410Sstevel@tonic-gate 
5420Sstevel@tonic-gate /*
543*9489SJoe.Bonasera@sun.com  * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
5440Sstevel@tonic-gate  */
5450Sstevel@tonic-gate void
5460Sstevel@tonic-gate kdi_xc_others(int this_cpu, void (*func)(void))
5470Sstevel@tonic-gate {
548*9489SJoe.Bonasera@sun.com 	extern int IGNORE_KERNEL_PREEMPTION;
5490Sstevel@tonic-gate 	int save_kernel_preemption;
5500Sstevel@tonic-gate 	cpuset_t set;
5510Sstevel@tonic-gate 
5523446Smrj 	if (!xc_initialized)
5533446Smrj 		return;
5543446Smrj 
5550Sstevel@tonic-gate 	save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
5560Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = 1;
557*9489SJoe.Bonasera@sun.com 	CPUSET_ALL_BUT(set, this_cpu);
558*9489SJoe.Bonasera@sun.com 	xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
5590Sstevel@tonic-gate 	IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
5600Sstevel@tonic-gate }
561*9489SJoe.Bonasera@sun.com 
562*9489SJoe.Bonasera@sun.com 
563*9489SJoe.Bonasera@sun.com 
564*9489SJoe.Bonasera@sun.com /*
565*9489SJoe.Bonasera@sun.com  * Invoke function on specified processors. Remotes may continue after
566*9489SJoe.Bonasera@sun.com  * service with no waiting. xc_call_nowait() may return immediately too.
567*9489SJoe.Bonasera@sun.com  */
568*9489SJoe.Bonasera@sun.com void
569*9489SJoe.Bonasera@sun.com xc_call_nowait(
570*9489SJoe.Bonasera@sun.com 	xc_arg_t arg1,
571*9489SJoe.Bonasera@sun.com 	xc_arg_t arg2,
572*9489SJoe.Bonasera@sun.com 	xc_arg_t arg3,
573*9489SJoe.Bonasera@sun.com 	ulong_t *set,
574*9489SJoe.Bonasera@sun.com 	xc_func_t func)
575*9489SJoe.Bonasera@sun.com {
576*9489SJoe.Bonasera@sun.com 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
577*9489SJoe.Bonasera@sun.com }
578*9489SJoe.Bonasera@sun.com 
579*9489SJoe.Bonasera@sun.com /*
580*9489SJoe.Bonasera@sun.com  * Invoke function on specified processors. Remotes may continue after
581*9489SJoe.Bonasera@sun.com  * service with no waiting. xc_call() returns only after remotes have finished.
582*9489SJoe.Bonasera@sun.com  */
583*9489SJoe.Bonasera@sun.com void
584*9489SJoe.Bonasera@sun.com xc_call(
585*9489SJoe.Bonasera@sun.com 	xc_arg_t arg1,
586*9489SJoe.Bonasera@sun.com 	xc_arg_t arg2,
587*9489SJoe.Bonasera@sun.com 	xc_arg_t arg3,
588*9489SJoe.Bonasera@sun.com 	ulong_t *set,
589*9489SJoe.Bonasera@sun.com 	xc_func_t func)
590*9489SJoe.Bonasera@sun.com {
591*9489SJoe.Bonasera@sun.com 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
592*9489SJoe.Bonasera@sun.com }
593*9489SJoe.Bonasera@sun.com 
594*9489SJoe.Bonasera@sun.com /*
595*9489SJoe.Bonasera@sun.com  * Invoke function on specified processors. Remotes wait until all have
596*9489SJoe.Bonasera@sun.com  * finished. xc_sync() also waits until all remotes have finished.
597*9489SJoe.Bonasera@sun.com  */
598*9489SJoe.Bonasera@sun.com void
599*9489SJoe.Bonasera@sun.com xc_sync(
600*9489SJoe.Bonasera@sun.com 	xc_arg_t arg1,
601*9489SJoe.Bonasera@sun.com 	xc_arg_t arg2,
602*9489SJoe.Bonasera@sun.com 	xc_arg_t arg3,
603*9489SJoe.Bonasera@sun.com 	ulong_t *set,
604*9489SJoe.Bonasera@sun.com 	xc_func_t func)
605*9489SJoe.Bonasera@sun.com {
606*9489SJoe.Bonasera@sun.com 	xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
607*9489SJoe.Bonasera@sun.com }
608