xref: /netbsd-src/sys/arch/xen/xen/hypervisor.c (revision 271b2a532810f2ad9518c59b4032829ca0caaa05)
1 /* $NetBSD: hypervisor.c,v 1.98 2024/12/06 10:53:42 bouyer Exp $ */
2 
3 /*
4  * Copyright (c) 2005 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  *
30  * Copyright (c) 2004 Christian Limpach.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
43  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
45  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
46  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
47  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
51  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52  */
53 
54 
55 #include <sys/cdefs.h>
56 __KERNEL_RCSID(0, "$NetBSD: hypervisor.c,v 1.98 2024/12/06 10:53:42 bouyer Exp $");
57 
58 #include <sys/param.h>
59 #include <sys/systm.h>
60 #include <sys/device.h>
61 #include <sys/sysctl.h>
62 
63 #include "xenbus.h"
64 #include "xencons.h"
65 #include "isa.h"
66 #include "isadma.h"
67 #include "pci.h"
68 #include "acpica.h"
69 #include "kernfs.h"
70 
71 #include "opt_xen.h"
72 #include "opt_mpbios.h"
73 
74 #include <xen/xen.h>
75 #include <xen/hypervisor.h>
76 #include <xen/evtchn.h>
77 #include <xen/include/public/version.h>
78 #include <xen/include/public/vcpu.h>
79 #include <x86/pio.h>
80 #include <x86/machdep.h>
81 
82 #include <sys/cpu.h>
83 #include <sys/dirent.h>
84 #include <sys/stat.h>
85 #include <sys/tree.h>
86 #include <sys/vnode.h>
87 #include <miscfs/specfs/specdev.h>
88 #include <miscfs/kernfs/kernfs.h>
89 #include <xen/kernfs_machdep.h>
90 #include <dev/isa/isavar.h>
91 #include <xen/granttables.h>
92 #include <xen/vcpuvar.h>
93 #if NPCI > 0
94 #include <dev/pci/pcivar.h>
95 #if NACPICA > 0
96 #include <dev/acpi/acpivar.h>
97 #include <machine/mpconfig.h>
98 #include <xen/mpacpi.h>
99 #endif
100 #ifdef MPBIOS
101 #include <machine/mpbiosvar.h>
102 #endif
103 #endif /* NPCI */
104 
105 #if NXENBUS > 0
106 #include <xen/xenbus.h>
107 #endif
108 
109 #if NXENNET_HYPERVISOR > 0
110 #include <net/if.h>
111 #include <net/if_ether.h>
112 #include <net/if_media.h>
113 #include <xen/if_xennetvar.h>
114 #endif
115 
116 #if NXBD_HYPERVISOR > 0
117 #include <sys/buf.h>
118 #include <sys/disk.h>
119 #include <sys/bufq.h>
120 #include <dev/dkvar.h>
121 #include <xen/xbdvar.h>
122 #endif
123 
124 int	hypervisor_match(device_t, cfdata_t, void *);
125 void	hypervisor_attach(device_t, device_t, void *);
126 
127 CFATTACH_DECL_NEW(hypervisor, 0,
128     hypervisor_match, hypervisor_attach, NULL, NULL);
129 
130 static int hypervisor_print(void *, const char *);
131 
132 union hypervisor_attach_cookie {
133 	const char *hac_device;		/* first elem of all */
134 #if NXENCONS > 0
135 	struct xencons_attach_args hac_xencons;
136 #endif
137 #if NXENBUS > 0
138 	struct xenbus_attach_args hac_xenbus;
139 #endif
140 #if NXENNET_HYPERVISOR > 0
141 	struct xennet_attach_args hac_xennet;
142 #endif
143 #if NXBD_HYPERVISOR > 0
144 	struct xbd_attach_args hac_xbd;
145 #endif
146 #if NPCI > 0
147 	struct pcibus_attach_args hac_pba;
148 #if defined(DOM0OPS) && NISA > 0
149 	struct isabus_attach_args hac_iba;
150 #endif
151 #if NACPICA > 0
152 	struct acpibus_attach_args hac_acpi;
153 #endif
154 #endif /* NPCI */
155 	struct vcpu_attach_args hac_vcaa;
156 };
157 
158 /*
159  * This is set when the ISA bus is attached.  If it's not set by the
160  * time it's checked below, then mainbus attempts to attach an ISA.
161  */
162 #if defined(XENPV) && defined(DOM0OPS)
163 int     isa_has_been_seen;
164 #if NISA > 0
165 struct  x86_isa_chipset x86_isa_chipset;
166 #endif
167 #endif
168 
169 #if defined(XENPVHVM) || defined(XENPVH)
170 #include <xen/include/public/arch-x86/cpuid.h>
171 #include <xen/include/public/arch-x86/hvm/start_info.h>
172 #include <xen/include/public/hvm/hvm_op.h>
173 #include <xen/include/public/hvm/params.h>
174 
175 #include <x86/bootinfo.h>
176 
177 #define	IDTVEC(name)	__CONCAT(X, name)
178 typedef void (vector)(void);
179 extern vector IDTVEC(syscall);
180 extern vector IDTVEC(syscall32);
181 extern vector IDTVEC(osyscall);
182 extern vector *x86_exceptions[];
183 
184 extern vector IDTVEC(hypervisor_pvhvm_callback);
185 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
186 
187 volatile shared_info_t *HYPERVISOR_shared_info __read_mostly;
188 paddr_t HYPERVISOR_shared_info_pa;
189 union start_info_union start_info_union __aligned(PAGE_SIZE);
190 struct hvm_start_info *hvm_start_info;
191 
192 static int xen_hvm_vec = 0;
193 #endif
194 
195 int xen_version;
196 
197 /* power management, for save/restore */
198 static bool hypervisor_suspend(device_t, const pmf_qual_t *);
199 static bool hypervisor_resume(device_t, const pmf_qual_t *);
200 
201 /* from FreeBSD */
202 #define XEN_MAGIC_IOPORT 0x10
203 enum {
204 	XMI_MAGIC                        = 0x49d2,
205 	XMI_UNPLUG_IDE_DISKS             = 0x01,
206 	XMI_UNPLUG_NICS                  = 0x02,
207 	XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
208 };
209 
210 
211 #ifdef XENPVHVM
212 
213 bool xenhvm_use_percpu_callback = 0;
214 
215 static void
216 xen_init_hypercall_page(void)
217 {
218 	extern vaddr_t hypercall_page;
219 	u_int descs[4];
220 
221 	x86_cpuid(XEN_CPUID_LEAF(2), descs);
222 
223 	/*
224 	 * Given 32 bytes per hypercall stub, and an optimistic number
225 	 * of 100 hypercalls ( the current max is 55), there shouldn't
226 	 * be any reason to spill over the arbitrary number of 1
227 	 * hypercall page. This is what we allocate in locore.S
228 	 * anyway. Make sure the allocation matches the registration.
229 	 */
230 
231 	KASSERT(descs[0] == 1);
232 
233 	/* XXX: vtophys(&hypercall_page) */
234 	wrmsr(descs[1], (uintptr_t)&hypercall_page - KERNBASE);
235 }
236 
237 uint32_t hvm_start_paddr;
238 
239 void init_xen_early(void);
240 void
241 init_xen_early(void)
242 {
243 	const char *cmd_line;
244 	if (!vm_guest_is_pvh())
245 		return;
246 
247 	hvm_start_info = (void *)((uintptr_t)hvm_start_paddr + KERNBASE);
248 
249 	if (hvm_start_info->cmdline_paddr != 0) {
250 		cmd_line =
251 		    (void *)((uintptr_t)hvm_start_info->cmdline_paddr + KERNBASE);
252 		strlcpy(xen_start_info.cmd_line, cmd_line,
253 		    sizeof(xen_start_info.cmd_line));
254 	} else {
255 		xen_start_info.cmd_line[0] = '\0';
256 	}
257 	xen_start_info.flags = hvm_start_info->flags;
258 
259 	if (vm_guest != VM_GUEST_XENPVH)
260 		return;
261 
262 	xen_init_hypercall_page();
263 
264 	HYPERVISOR_shared_info = (void *)((uintptr_t)HYPERVISOR_shared_info_pa + KERNBASE);
265 	struct xen_add_to_physmap xmap = {
266 		.domid = DOMID_SELF,
267 		.space = XENMAPSPACE_shared_info,
268 		.idx = 0, /* Important - XEN checks for this */
269 		.gpfn = atop(HYPERVISOR_shared_info_pa)
270 	};
271 
272 	int err;
273 
274 	if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap)) < 0) {
275 		printk(
276 		    "Xen HVM: Unable to register HYPERVISOR_shared_info %d\n", err);
277 	}
278 	delay_func = x86_delay = xen_delay;
279 	x86_initclock_func = xen_initclocks;
280 }
281 
282 
283 static bool
284 xen_check_hypervisordev(void)
285 {
286 	extern struct cfdata cfdata[];
287 	for (int i = 0; cfdata[i].cf_name != NULL; i++) {
288 		if (strcasecmp("hypervisor", cfdata[i].cf_name) == 0) {
289 			switch(cfdata[i].cf_fstate) {
290 			case FSTATE_NOTFOUND:
291 			case FSTATE_FOUND:
292 			case FSTATE_STAR:
293 				return true;
294 			default:
295 				return false;
296 			}
297 		}
298 	}
299 	return 0;
300 }
301 
302 static int
303 xen_hvm_init_late(void)
304 {
305 	struct idt_vec *iv = &(cpu_info_primary.ci_idtvec);
306 
307 	if (HYPERVISOR_xen_version(XENVER_version, NULL) < 0) {
308 		aprint_error("Xen HVM: hypercall page not working\n");
309 		return 0;
310 	}
311 	xen_init_features();
312 
313 	/* Init various preset boot time data structures  */
314 	/* XEN xenstore shared page address, event channel */
315 	struct xen_hvm_param xen_hvm_param;
316 
317 	xen_hvm_param.domid = DOMID_SELF;
318 	xen_hvm_param.index = HVM_PARAM_STORE_PFN;
319 
320 	if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) {
321 		aprint_error(
322 		    "Xen HVM: Unable to obtain xenstore page address\n");
323 		return 0;
324 	}
325 
326 	/* Re-use PV field */
327 	xen_start_info.store_mfn = xen_hvm_param.value;
328 
329 	pmap_kenter_pa((vaddr_t) xenstore_interface, ptoa(xen_start_info.store_mfn),
330 	    VM_PROT_READ|VM_PROT_WRITE, 0);
331 
332 	xen_hvm_param.domid = DOMID_SELF;
333 	xen_hvm_param.index = HVM_PARAM_STORE_EVTCHN;
334 
335 	if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) {
336 		aprint_error(
337 		    "Xen HVM: Unable to obtain xenstore event channel\n");
338 		return 0;
339 	}
340 
341 	xen_start_info.store_evtchn = xen_hvm_param.value;
342 
343 	/*
344 	 * First register callback: here's why
345 	 * http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=7b5b8ca7dffde866d851f0b87b994e0b13e5b867
346 	 */
347 
348 	/*
349 	 * Check for XENFEAT_hvm_callback_vector. Can't proceed
350 	 * without it.
351 	 */
352 	if (!xen_feature(XENFEAT_hvm_callback_vector)) {
353 		aprint_error("Xen HVM: XENFEAT_hvm_callback_vector"
354 		    "not available, cannot proceed");
355 		return 0;
356 	}
357 
358 	/*
359 	 * prepare vector.
360 	 * We don't really care where it is, as long as it's free
361 	 */
362 	xen_hvm_vec = idt_vec_alloc(iv, 129, 255);
363 	idt_vec_set(iv, xen_hvm_vec, &IDTVEC(hypervisor_pvhvm_callback));
364 
365 	events_default_setup();
366 	return 1;
367 }
368 
369 int
370 xen_hvm_init(void)
371 {
372 	/*
373 	 * We need to setup the HVM interfaces early, so that we can
374 	 * properly setup the CPUs later (especially, all CPUs needs to
375 	 * run x86_cpuid() locally to get their vcpuid.
376 	 *
377 	 * For PVH, part of it has already been done.
378 	 */
379 	if (vm_guest == VM_GUEST_XENPVH) {
380 		if (xen_hvm_init_late() == 0) {
381 			panic("hvm_init failed");
382 		}
383 		return 1;
384 	}
385 
386 	if (vm_guest != VM_GUEST_XENHVM)
387 		return 0;
388 
389 	/* check if hypervisor was disabled with userconf */
390 	if (!xen_check_hypervisordev())
391 		return 0;
392 
393 	aprint_normal("Identified Guest XEN in HVM mode.\n");
394 
395 	xen_init_hypercall_page();
396 
397 	/* HYPERVISOR_shared_info */
398 	struct xen_add_to_physmap xmap = {
399 		.domid = DOMID_SELF,
400 		.space = XENMAPSPACE_shared_info,
401 		.idx = 0, /* Important - XEN checks for this */
402 		.gpfn = atop(HYPERVISOR_shared_info_pa)
403 	};
404 
405 	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0) {
406 		aprint_error(
407 		    "Xen HVM: Unable to register HYPERVISOR_shared_info\n");
408 		return 0;
409 	}
410 
411 	/* HYPERVISOR_shared_info va,pa has been allocated in pmap_bootstrap() */
412 	pmap_kenter_pa((vaddr_t) HYPERVISOR_shared_info,
413 	    HYPERVISOR_shared_info_pa, VM_PROT_READ|VM_PROT_WRITE, 0);
414 
415 	if (xen_hvm_init_late() == 0)
416 		return 0;
417 
418 	struct xen_hvm_param xen_hvm_param;
419 	xen_hvm_param.domid = DOMID_SELF;
420 	xen_hvm_param.index = HVM_PARAM_CONSOLE_PFN;
421 
422 	if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) {
423 		aprint_debug(
424 		    "Xen HVM: Unable to obtain xencons page address\n");
425 		xen_start_info.console.domU.mfn = 0;
426 		xen_start_info.console.domU.evtchn = -1;
427 		xencons_interface = 0;
428 	} else {
429 		/* Re-use PV field */
430 		xen_start_info.console.domU.mfn = xen_hvm_param.value;
431 
432 		pmap_kenter_pa((vaddr_t) xencons_interface,
433 		    ptoa(xen_start_info.console.domU.mfn),
434 		    VM_PROT_READ|VM_PROT_WRITE, 0);
435 
436 		xen_hvm_param.domid = DOMID_SELF;
437 		xen_hvm_param.index = HVM_PARAM_CONSOLE_EVTCHN;
438 
439 		if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) {
440 			aprint_error(
441 			   "Xen HVM: Unable to obtain xencons event channel\n");
442 			return 0;
443 		}
444 
445 		xen_start_info.console.domU.evtchn = xen_hvm_param.value;
446 	}
447 
448 	/*
449 	 * PR port-amd64/55543
450 	 * workround for amazon's Xen 4.2: it looks like the Xen clock is not
451 	 * fully functional here. This version also doesn't support
452 	 * HVM_PARAM_CONSOLE_PFN.
453 	 */
454 	if (xencons_interface != 0) {
455 		delay_func = x86_delay = xen_delay;
456 		x86_initclock_func = xen_initclocks;
457 	}
458 
459 	vm_guest = VM_GUEST_XENPVHVM; /* Be more specific */
460 	return 1;
461 }
462 
463 int
464 xen_hvm_init_cpu(struct cpu_info *ci)
465 {
466 	u_int32_t descs[4];
467 	struct xen_hvm_param xen_hvm_param;
468 	int error;
469 	static bool again = 0;
470 
471 	if (!vm_guest_is_xenpvh_or_pvhvm())
472 		return 0;
473 
474 	KASSERT(ci == curcpu());
475 
476 	descs[0] = 0;
477 	x86_cpuid(XEN_CPUID_LEAF(4), descs);
478 	if (descs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) {
479 		ci->ci_vcpuid = descs[1];
480 	} else {
481 		aprint_debug_dev(ci->ci_dev,
482 		    "Xen HVM: can't get VCPU id, falling back to ci_acpiid\n");
483 		ci->ci_vcpuid = ci->ci_acpiid;
484 	}
485 
486 	xen_map_vcpu(ci);
487 
488 	/* Register event callback handler. */
489 
490 	xen_hvm_param.domid = DOMID_SELF;
491 	xen_hvm_param.index = HVM_PARAM_CALLBACK_IRQ;
492 
493 	/* val[63:56] = 2, val[7:0] = vec */
494 	xen_hvm_param.value = ((int64_t)0x2 << 56) | xen_hvm_vec;
495 
496 	/* First try to set up a per-cpu vector. */
497 	if (!again || xenhvm_use_percpu_callback) {
498 		struct xen_hvm_evtchn_upcall_vector xen_hvm_uvec;
499 		xen_hvm_uvec.vcpu = ci->ci_vcpuid;
500 		xen_hvm_uvec.vector = xen_hvm_vec;
501 
502 		xenhvm_use_percpu_callback = 1;
503 		error = HYPERVISOR_hvm_op(
504 		    HVMOP_set_evtchn_upcall_vector, &xen_hvm_uvec);
505 		if (error < 0) {
506 			aprint_error_dev(ci->ci_dev,
507 			    "failed to set event upcall vector: %d\n", error);
508 			if (again)
509 				panic("event upcall vector");
510 			aprint_error_dev(ci->ci_dev,
511 			    "falling back to global vector\n");
512 			xenhvm_use_percpu_callback = 0;
513 		} else {
514 			/*
515 			 * From FreeBSD:
516 			 * Trick toolstack to think we are enlightened
517 			 */
518 			xen_hvm_param.value = 1;
519 			aprint_verbose_dev(ci->ci_dev,
520 			    "using event upcall vector: %d\n", xen_hvm_vec );
521 		}
522 	}
523 
524 	if (again)
525 		return 1;
526 
527 	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xen_hvm_param) < 0) {
528 		aprint_error_dev(ci->ci_dev,
529 		    "Xen HVM: Unable to register event callback vector\n");
530 		vm_guest = VM_GUEST_XENHVM;
531 		return 0;
532 	}
533 	again = 1;
534 	return 1;
535 }
536 
537 #endif /* XENPVHVM */
538 
539 /*
540  * Probe for the hypervisor; always succeeds.
541  */
542 int
543 hypervisor_match(device_t parent, cfdata_t match, void *aux)
544 {
545 	struct hypervisor_attach_args *haa = aux;
546 
547 	/* Attach path sanity check */
548 	if (strncmp(haa->haa_busname, "hypervisor", sizeof("hypervisor")) != 0)
549 		return 0;
550 
551 
552 #ifdef XENPVHVM
553 	if (!vm_guest_is_xenpvh_or_pvhvm())
554 		return 0;
555 #endif
556 	/* If we got here, it must mean we matched */
557 	return 1;
558 }
559 
560 #if defined(MULTIPROCESSOR) && defined(XENPV)
561 static int
562 hypervisor_vcpu_print(void *aux, const char *parent)
563 {
564 	/* Unconfigured cpus are ignored quietly. */
565 	return (QUIET);
566 }
567 #endif /* MULTIPROCESSOR && XENPV */
568 
569 /*
570  * Attach the hypervisor.
571  */
572 void
573 hypervisor_attach(device_t parent, device_t self, void *aux)
574 {
575 
576 #if NPCI >0
577 #ifdef PCI_BUS_FIXUP
578 	int pci_maxbus = 0;
579 #endif
580 #endif /* NPCI */
581 	union hypervisor_attach_cookie hac;
582 	char xen_extra_version[XEN_EXTRAVERSION_LEN];
583 	static char xen_version_string[20];
584 	int rc;
585 	const struct sysctlnode *node = NULL;
586 
587 #ifdef XENPVHVM
588 	if (vm_guest == VM_GUEST_XENPVHVM) {
589 		/* disable emulated devices */
590 		if (inw(XEN_MAGIC_IOPORT) == XMI_MAGIC) {
591 			outw(XEN_MAGIC_IOPORT,
592 			    XMI_UNPLUG_IDE_DISKS | XMI_UNPLUG_NICS);
593 		} else {
594 			aprint_error_dev(self,
595 			    "Unable to disable emulated devices\n");
596 		}
597 	}
598 #endif /* XENPVHVM */
599 	xenkernfs_init();
600 
601 	xen_version = HYPERVISOR_xen_version(XENVER_version, NULL);
602 	memset(xen_extra_version, 0, sizeof(xen_extra_version));
603 	HYPERVISOR_xen_version(XENVER_extraversion, xen_extra_version);
604 	rc = snprintf(xen_version_string, 20, "%d.%d%s", XEN_MAJOR(xen_version),
605 		XEN_MINOR(xen_version), xen_extra_version);
606 	aprint_normal(": Xen version %s\n", xen_version_string);
607 	if (rc >= 20)
608 		aprint_debug(": xen_version_string truncated\n");
609 
610 	sysctl_createv(NULL, 0, NULL, &node, 0,
611 	    CTLTYPE_NODE, "xen",
612 	    SYSCTL_DESCR("Xen top level node"),
613 	    NULL, 0, NULL, 0, CTL_MACHDEP, CTL_CREATE, CTL_EOL);
614 
615 	if (node != NULL) {
616 		sysctl_createv(NULL, 0, &node, NULL, CTLFLAG_READONLY,
617 		    CTLTYPE_STRING, "version",
618 		    SYSCTL_DESCR("Xen hypervisor version"),
619 		    NULL, 0, xen_version_string, 0, CTL_CREATE, CTL_EOL);
620 	}
621 
622 	aprint_verbose_dev(self, "features: ");
623 #define XEN_TST_F(n) \
624 	if (xen_feature(XENFEAT_##n)) \
625 		aprint_verbose(" %s", #n);
626 
627 	XEN_TST_F(writable_page_tables);
628 	XEN_TST_F(writable_descriptor_tables);
629 	XEN_TST_F(auto_translated_physmap);
630 	XEN_TST_F(supervisor_mode_kernel);
631 	XEN_TST_F(pae_pgdir_above_4gb);
632 	XEN_TST_F(mmu_pt_update_preserve_ad);
633 	XEN_TST_F(highmem_assist);
634 	XEN_TST_F(gnttab_map_avail_bits);
635 	XEN_TST_F(hvm_callback_vector);
636 	XEN_TST_F(hvm_safe_pvclock);
637 	XEN_TST_F(hvm_pirqs);
638 #undef XEN_TST_F
639 	aprint_verbose("\n");
640 
641 	xengnt_init();
642 	events_init();
643 
644 #ifdef XENPV
645 	memset(&hac, 0, sizeof(hac));
646 	hac.hac_vcaa.vcaa_name = "vcpu";
647 	hac.hac_vcaa.vcaa_caa.cpu_number = 0;
648 	hac.hac_vcaa.vcaa_caa.cpu_role = CPU_ROLE_BP;
649 	hac.hac_vcaa.vcaa_caa.cpu_func = NULL; /* See xen/x86/cpu.c:vcpu_attach() */
650 	config_found(self, &hac.hac_vcaa, hypervisor_print,
651 	    CFARGS(.iattr = "xendevbus"));
652 
653 #ifdef MULTIPROCESSOR
654 
655 	/*
656 	 * The xenstore contains the configured number of vcpus.
657 	 * The xenstore however, is not accessible until much later in
658 	 * the boot sequence. We therefore bruteforce check for
659 	 * allocated vcpus (See: cpu.c:vcpu_match()) by iterating
660 	 * through the maximum supported by NetBSD MP.
661 	 */
662 	cpuid_t vcpuid;
663 
664 	for (vcpuid = 1; vcpuid < maxcpus; vcpuid++) {
665 		memset(&hac, 0, sizeof(hac));
666 		hac.hac_vcaa.vcaa_name = "vcpu";
667 		hac.hac_vcaa.vcaa_caa.cpu_number = vcpuid;
668 		hac.hac_vcaa.vcaa_caa.cpu_role = CPU_ROLE_AP;
669 		hac.hac_vcaa.vcaa_caa.cpu_func = NULL; /* See xen/x86/cpu.c:vcpu_attach() */
670 		if (NULL == config_found(self, &hac.hac_vcaa,
671 					 hypervisor_vcpu_print,
672 					 CFARGS(.iattr = "xendevbus"))) {
673 			break;
674 		}
675 	}
676 
677 #endif /* MULTIPROCESSOR */
678 #endif /* XENPV */
679 
680 #if NXENBUS > 0
681 	extern struct x86_bus_dma_tag xenbus_bus_dma_tag;
682 	memset(&hac, 0, sizeof(hac));
683 	hac.hac_xenbus.xa_device = "xenbus";
684 	hac.hac_xenbus.xa_dmat = &xenbus_bus_dma_tag;
685 	config_found(self, &hac.hac_xenbus, hypervisor_print,
686 	    CFARGS(.iattr = "xendevbus"));
687 #endif
688 #if NXENCONS > 0
689 	if (xencons_interface != 0 || vm_guest != VM_GUEST_XENPVHVM) {
690 		memset(&hac, 0, sizeof(hac));
691 		hac.hac_xencons.xa_device = "xencons";
692 		config_found(self, &hac.hac_xencons, hypervisor_print,
693 		    CFARGS(.iattr = "xendevbus"));
694 	}
695 #endif
696 
697 #if defined(DOM0OPS)
698 #if defined(XENPV)
699 #if NISADMA > 0 && NACPICA > 0
700         /*
701 	 * ACPI needs ISA DMA initialized before they start probing.
702 	 */
703 	isa_dmainit(&x86_isa_chipset, x86_bus_space_io, &isa_bus_dma_tag,
704 	    self);
705 #endif
706 
707 #if NPCI > 0
708 #if NACPICA > 0
709 	if (acpi_present) {
710 		memset(&hac, 0, sizeof(hac));
711 		hac.hac_acpi.aa_iot = x86_bus_space_io;
712 		hac.hac_acpi.aa_memt = x86_bus_space_mem;
713 		hac.hac_acpi.aa_pc = NULL;
714 		hac.hac_acpi.aa_pciflags =
715 			PCI_FLAGS_IO_OKAY | PCI_FLAGS_MEM_OKAY |
716 			PCI_FLAGS_MRL_OKAY | PCI_FLAGS_MRM_OKAY |
717 			PCI_FLAGS_MWI_OKAY;
718 		hac.hac_acpi.aa_ic = &x86_isa_chipset;
719 		hac.hac_acpi.aa_dmat = &pci_bus_dma_tag;
720 #ifdef _LP64
721 		hac.hac_acpi.aa_dmat64 = &pci_bus_dma64_tag;
722 #else
723 		hac.hac_acpi.aa_dmat64 = NULL;
724 #endif /* _LP64 */
725 		config_found(self, &hac.hac_acpi, NULL,
726 		    CFARGS(.iattr = "acpibus"));
727 	}
728 #endif /* NACPICA */
729 	memset(&hac, 0, sizeof(hac));
730 	hac.hac_pba.pba_iot = x86_bus_space_io;
731 	hac.hac_pba.pba_memt = x86_bus_space_mem;
732 	hac.hac_pba.pba_dmat = &pci_bus_dma_tag;
733 #ifdef _LP64
734 	hac.hac_pba.pba_dmat64 = &pci_bus_dma64_tag;
735 #else
736 	hac.hac_pba.pba_dmat64 = NULL;
737 #endif /* _LP64 */
738 	hac.hac_pba.pba_flags = PCI_FLAGS_MEM_OKAY | PCI_FLAGS_IO_OKAY;
739 	hac.hac_pba.pba_bridgetag = NULL;
740 	hac.hac_pba.pba_bus = 0;
741 #if NACPICA > 0 && defined(ACPI_SCANPCI)
742 	if (mpacpi_active)
743 		mp_pci_scan(self, &hac.hac_pba, pcibusprint);
744 	else
745 #endif
746 #if defined(MPBIOS) && defined(MPBIOS_SCANPCI)
747 	if (mpbios_scanned != 0)
748 		mp_pci_scan(self, &hac.hac_pba, pcibusprint);
749 	else
750 #endif
751 	config_found(self, &hac.hac_pba, pcibusprint,
752 	    CFARGS(.iattr = "pcibus"));
753 #if NACPICA > 0
754 	if (mp_verbose)
755 		acpi_pci_link_state();
756 #endif
757 #if NISA > 0
758 	if (isa_has_been_seen == 0) {
759 		memset(&hac, 0, sizeof(hac));
760 		hac.hac_iba._iba_busname = "isa";
761 		hac.hac_iba.iba_iot = x86_bus_space_io;
762 		hac.hac_iba.iba_memt = x86_bus_space_mem;
763 		hac.hac_iba.iba_dmat = &isa_bus_dma_tag;
764 		hac.hac_iba.iba_ic = NULL; /* No isa DMA yet */
765 		config_found(self, &hac.hac_iba, isabusprint,
766 		    CFARGS(.iattr = "isabus"));
767 	}
768 #endif /* NISA */
769 #endif /* NPCI */
770 #endif /* XENPV */
771 
772 	if (xendomain_is_privileged()) {
773 		xenprivcmd_init();
774 	}
775 #endif /* DOM0OPS */
776 
777 	hypervisor_machdep_attach();
778 
779 	if (!pmf_device_register(self, hypervisor_suspend, hypervisor_resume))
780 		aprint_error_dev(self, "couldn't establish power handler\n");
781 
782 }
783 
784 static bool
785 hypervisor_suspend(device_t dev, const pmf_qual_t *qual)
786 {
787 #ifdef XENPV
788 	events_suspend();
789 	xengnt_suspend();
790 #endif
791 	return true;
792 }
793 
794 static bool
795 hypervisor_resume(device_t dev, const pmf_qual_t *qual)
796 {
797 #ifdef XENPV
798 	hypervisor_machdep_resume();
799 
800 	xengnt_resume();
801 	events_resume();
802 #endif
803 	return true;
804 }
805 
806 static int
807 hypervisor_print(void *aux, const char *parent)
808 {
809 	union hypervisor_attach_cookie *hac = aux;
810 
811 	if (parent)
812 		aprint_normal("%s at %s", hac->hac_device, parent);
813 	return (UNCONF);
814 }
815 
816 #define DIR_MODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
817 
818 kernfs_parentdir_t *kernxen_pkt;
819 
820 void
821 xenkernfs_init(void)
822 {
823 #if NKERNFS > 0
824 	kernfs_entry_t *dkt;
825 
826 	KERNFS_ALLOCENTRY(dkt, KM_SLEEP);
827 	KERNFS_INITENTRY(dkt, DT_DIR, "xen", NULL, KFSsubdir, VDIR, DIR_MODE);
828 	kernfs_addentry(NULL, dkt);
829 	kernxen_pkt = KERNFS_ENTOPARENTDIR(dkt);
830 #endif
831 }
832 
833 /*
834  * setup Xen's vcpu_info. requires ci_vcpuid to be initialized.
835  */
836 void
837 xen_map_vcpu(struct cpu_info *ci)
838 {
839 	int size;
840 	uintptr_t ptr;
841 	struct vcpu_register_vcpu_info vcpu_info_op;
842 	paddr_t ma;
843 	int ret;
844 
845 	if (ci->ci_vcpuid < XEN_LEGACY_MAX_VCPUS) {
846 		ci->ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[ci->ci_vcpuid];
847 		return;
848 	}
849 
850 	/*
851 	 * need to map it via VCPUOP_register_vcpu_info
852 	 * aligning to the smallest power-of-2 size which can contain
853 	 * vcpu_info ensures this. Also make sure it's cache-line aligned,
854 	 * for performances.
855 	 */
856 	size = CACHE_LINE_SIZE;
857 	while (size < sizeof(struct vcpu_info)) {
858 		size = size << 1;
859 	}
860 	ptr = (uintptr_t)uvm_km_alloc(kernel_map,
861 		    sizeof(struct vcpu_info) + size - 1, 0,
862 		    UVM_KMF_WIRED|UVM_KMF_ZERO);
863 	ptr = roundup2(ptr, size);
864 	ci->ci_vcpu = (struct vcpu_info *)ptr;
865 
866 	pmap_extract_ma(pmap_kernel(), (ptr & ~PAGE_MASK), &ma);
867 	vcpu_info_op.mfn = ma >> PAGE_SHIFT;
868 	vcpu_info_op.offset = (ptr & PAGE_MASK);
869 	vcpu_info_op.rsvd = 0;
870 
871 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info,
872 	    ci->ci_vcpuid, &vcpu_info_op);
873 	if (ret) {
874 		panic("VCPUOP_register_vcpu_info for %d failed: %d",
875 		    ci->ci_vcpuid, ret);
876 	}
877 }
878