xref: /netbsd-src/sys/arch/xen/x86/hypervisor_machdep.c (revision cac8e449158efc7261bebc8657cbb0125a2cfdde)
1 /*	$NetBSD: hypervisor_machdep.c,v 1.9 2008/07/01 18:49:21 bouyer Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 2004 Christian Limpach.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Christian Limpach.
19  * 4. The name of the author may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /******************************************************************************
35  * hypervisor.c
36  *
37  * Communication to/from hypervisor.
38  *
39  * Copyright (c) 2002-2004, K A Fraser
40  *
41  * Permission is hereby granted, free of charge, to any person obtaining a copy
42  * of this software and associated documentation files (the "Software"), to
43  * deal in the Software without restriction, including without limitation the
44  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
45  * sell copies of the Software, and to permit persons to whom the Software is
46  * furnished to do so, subject to the following conditions:
47  *
48  * The above copyright notice and this permission notice shall be included in
49  * all copies or substantial portions of the Software.
50  *
51  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
52  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
53  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
54  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
55  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
56  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
57  * DEALINGS IN THE SOFTWARE.
58  */
59 
60 
61 #include <sys/cdefs.h>
62 __KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.9 2008/07/01 18:49:21 bouyer Exp $");
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 
67 #include <xen/xen.h>
68 #include <xen/hypervisor.h>
69 #include <xen/evtchn.h>
70 
71 #include "opt_xen.h"
72 
73 // #define PORT_DEBUG 4
74 // #define EARLY_DEBUG_EVENT
75 
76 int stipending(void);
77 int
78 stipending(void)
79 {
80 	unsigned long l1;
81 	unsigned long l2;
82 	unsigned int l1i, l2i, port;
83 	volatile shared_info_t *s = HYPERVISOR_shared_info;
84 	struct cpu_info *ci;
85 	volatile struct vcpu_info *vci;
86 	int ret;
87 
88 	ret = 0;
89 	ci = curcpu();
90 	vci = ci->ci_vcpu;
91 
92 #if 0
93 	if (HYPERVISOR_shared_info->events)
94 		printf("stipending events %08lx mask %08lx ilevel %d\n",
95 		    HYPERVISOR_shared_info->events,
96 		    HYPERVISOR_shared_info->events_mask, ci->ci_ilevel);
97 #endif
98 
99 #ifdef EARLY_DEBUG_EVENT
100 	if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) {
101 		xen_debug_handler(NULL);
102 		xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port);
103 	}
104 #endif
105 
106 	/*
107 	 * we're only called after STIC, so we know that we'll have to
108 	 * STI at the end
109 	 */
110 	while (vci->evtchn_upcall_pending) {
111 		cli();
112 		vci->evtchn_upcall_pending = 0;
113 		/* NB. No need for a barrier here -- XCHG is a barrier
114 		 * on x86. */
115 #ifdef XEN3
116 		l1 = xen_atomic_xchg(&vci->evtchn_pending_sel, 0);
117 #else
118 		l1 = xen_atomic_xchg(&s->evtchn_pending_sel, 0);
119 #endif
120 		while ((l1i = xen_ffs(l1)) != 0) {
121 			l1i--;
122 			l1 &= ~(1UL << l1i);
123 
124 			l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i];
125 			/*
126 			 * mask and clear event. More efficient than calling
127 			 * hypervisor_mask/clear_event for each event.
128 			 */
129 			xen_atomic_setbits_l(&s->evtchn_mask[l1i], l2);
130 			xen_atomic_clearbits_l(&s->evtchn_pending[l1i], l2);
131 			while ((l2i = xen_ffs(l2)) != 0) {
132 				l2i--;
133 				l2 &= ~(1UL << l2i);
134 
135 				port = (l1i << LONG_SHIFT) + l2i;
136 				if (evtsource[port]) {
137 					hypervisor_set_ipending(
138 					    evtsource[port]->ev_imask,
139 					    l1i, l2i);
140 					evtsource[port]->ev_evcnt.ev_count++;
141 					if (ret == 0 && ci->ci_ilevel <
142 					    evtsource[port]->ev_maxlevel)
143 						ret = 1;
144 				}
145 #ifdef DOM0OPS
146 				else  {
147 					/* set pending event */
148 					xenevt_setipending(l1i, l2i);
149 				}
150 #endif
151 			}
152 		}
153 		sti();
154 	}
155 
156 #if 0
157 	if (ci->ci_ipending & 0x1)
158 		printf("stipending events %08lx mask %08lx ilevel %d ipending %08x\n",
159 		    HYPERVISOR_shared_info->events,
160 		    HYPERVISOR_shared_info->events_mask, ci->ci_ilevel,
161 		    ci->ci_ipending);
162 #endif
163 
164 	return (ret);
165 }
166 
167 void
168 do_hypervisor_callback(struct intrframe *regs)
169 {
170 	unsigned long l1;
171 	unsigned long l2;
172 	unsigned int l1i, l2i, port;
173 	volatile shared_info_t *s = HYPERVISOR_shared_info;
174 	struct cpu_info *ci;
175 	volatile struct vcpu_info *vci;
176 	int level;
177 
178 	ci = curcpu();
179 	vci = ci->ci_vcpu;
180 	level = ci->ci_ilevel;
181 
182 	// DDD printf("do_hypervisor_callback\n");
183 
184 #ifdef EARLY_DEBUG_EVENT
185 	if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) {
186 		xen_debug_handler(NULL);
187 		xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port);
188 	}
189 #endif
190 
191 	while (vci->evtchn_upcall_pending) {
192 		vci->evtchn_upcall_pending = 0;
193 		/* NB. No need for a barrier here -- XCHG is a barrier
194 		 * on x86. */
195 #ifdef XEN3
196 		l1 = xen_atomic_xchg(&vci->evtchn_pending_sel, 0);
197 #else
198 		l1 = xen_atomic_xchg(&s->evtchn_pending_sel, 0);
199 #endif
200 		while ((l1i = xen_ffs(l1)) != 0) {
201 			l1i--;
202 			l1 &= ~(1UL << l1i);
203 
204 			l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i];
205 			/*
206 			 * mask and clear the pending events.
207 			 * Doing it here for all event that will be processed
208 			 * avoids a race with stipending (which can be called
209 			 * though evtchn_do_event->splx) that could cause an
210 			 * event to be both processed and marked pending.
211 			 */
212 			xen_atomic_setbits_l(&s->evtchn_mask[l1i], l2);
213 			xen_atomic_clearbits_l(&s->evtchn_pending[l1i], l2);
214 
215 			while ((l2i = xen_ffs(l2)) != 0) {
216 				l2i--;
217 				l2 &= ~(1UL << l2i);
218 
219 				port = (l1i << LONG_SHIFT) + l2i;
220 #ifdef PORT_DEBUG
221 				if (port == PORT_DEBUG)
222 					printf("do_hypervisor_callback event %d\n", port);
223 #endif
224 				if (evtsource[port])
225 					call_evtchn_do_event(port, regs);
226 #ifdef DOM0OPS
227 				else  {
228 					if (ci->ci_ilevel < IPL_HIGH) {
229 						/* fast path */
230 						int oipl = ci->ci_ilevel;
231 						ci->ci_ilevel = IPL_HIGH;
232 						call_xenevt_event(port);
233 						ci->ci_ilevel = oipl;
234 					} else {
235 						/* set pending event */
236 						xenevt_setipending(l1i, l2i);
237 					}
238 				}
239 #endif
240 			}
241 		}
242 	}
243 
244 #ifdef DIAGNOSTIC
245 	if (level != ci->ci_ilevel)
246 		printf("hypervisor done %08x level %d/%d ipending %08x\n",
247 #ifdef XEN3
248 		    (uint)vci->evtchn_pending_sel,
249 #else
250 		    (uint)HYPERVISOR_shared_info->evtchn_pending_sel,
251 #endif
252 		    level, ci->ci_ilevel, ci->ci_ipending);
253 #endif
254 }
255 
256 void
257 hypervisor_unmask_event(unsigned int ev)
258 {
259 	volatile shared_info_t *s = HYPERVISOR_shared_info;
260 	volatile struct vcpu_info *vci = curcpu()->ci_vcpu;
261 
262 #ifdef PORT_DEBUG
263 	if (ev == PORT_DEBUG)
264 		printf("hypervisor_unmask_event %d\n", ev);
265 #endif
266 
267 	xen_atomic_clear_bit(&s->evtchn_mask[0], ev);
268 	/*
269 	 * The following is basically the equivalent of
270 	 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the
271 	 * interrupt edge' if the channel is masked.
272 	 */
273 	if (xen_atomic_test_bit(&s->evtchn_pending[0], ev) &&
274 #ifdef XEN3
275 	    !xen_atomic_test_and_set_bit(&vci->evtchn_pending_sel, ev>>LONG_SHIFT)) {
276 #else
277 	    !xen_atomic_test_and_set_bit(&s->evtchn_pending_sel, ev>>LONG_SHIFT)) {
278 #endif
279 		xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0);
280 		if (!vci->evtchn_upcall_mask)
281 			hypervisor_force_callback();
282 	}
283 }
284 
285 void
286 hypervisor_mask_event(unsigned int ev)
287 {
288 	volatile shared_info_t *s = HYPERVISOR_shared_info;
289 #ifdef PORT_DEBUG
290 	if (ev == PORT_DEBUG)
291 		printf("hypervisor_mask_event %d\n", ev);
292 #endif
293 
294 	xen_atomic_set_bit(&s->evtchn_mask[0], ev);
295 }
296 
297 void
298 hypervisor_clear_event(unsigned int ev)
299 {
300 	volatile shared_info_t *s = HYPERVISOR_shared_info;
301 #ifdef PORT_DEBUG
302 	if (ev == PORT_DEBUG)
303 		printf("hypervisor_clear_event %d\n", ev);
304 #endif
305 
306 	xen_atomic_clear_bit(&s->evtchn_pending[0], ev);
307 }
308 
309 void
310 hypervisor_enable_ipl(unsigned int ipl)
311 {
312 	u_long l1, l2;
313 	int l1i, l2i;
314 	struct cpu_info *ci = curcpu();
315 
316 	/*
317 	 * enable all events for ipl. As we only set an event in ipl_evt_mask
318 	 * for its lowest IPL, and pending IPLs are processed high to low,
319 	 * we know that all callback for this event have been processed.
320 	 */
321 
322 	l1 = ci->ci_isources[ipl]->ipl_evt_mask1;
323 	ci->ci_isources[ipl]->ipl_evt_mask1 = 0;
324 	while ((l1i = xen_ffs(l1)) != 0) {
325 		l1i--;
326 		l1 &= ~(1UL << l1i);
327 		l2 = ci->ci_isources[ipl]->ipl_evt_mask2[l1i];
328 		ci->ci_isources[ipl]->ipl_evt_mask2[l1i] = 0;
329 		while ((l2i = xen_ffs(l2)) != 0) {
330 			int evtch;
331 
332 			l2i--;
333 			l2 &= ~(1UL << l2i);
334 
335 			evtch = (l1i << LONG_SHIFT) + l2i;
336 			hypervisor_enable_event(evtch);
337 		}
338 	}
339 }
340 
341 void
342 hypervisor_set_ipending(uint32_t iplmask, int l1, int l2)
343 {
344 	int ipl;
345 	struct cpu_info *ci = curcpu();
346 
347 	/* set pending bit for the appropriate IPLs */
348 	ci->ci_ipending |= iplmask;
349 
350 	/*
351 	 * And set event pending bit for the lowest IPL. As IPL are handled
352 	 * from high to low, this ensure that all callbacks will have been
353 	 * called when we ack the event
354 	 */
355 	ipl = ffs(iplmask);
356 	KASSERT(ipl > 0);
357 	ipl--;
358 	ci->ci_isources[ipl]->ipl_evt_mask1 |= 1UL << l1;
359 	ci->ci_isources[ipl]->ipl_evt_mask2[l1] |= 1UL << l2;
360 }
361