xref: /netbsd-src/sys/arch/xen/x86/cpu.c (revision 627f7eb200a4419d89b531d55fccd2ee3ffdcde0)
1 /*	$NetBSD: cpu.c,v 1.139 2020/07/14 00:45:53 yamaguchi Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000 The NetBSD Foundation, Inc.
5  * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi,
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by RedBack Networks Inc.
10  *
11  * Author: Bill Sommerfeld
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Copyright (c) 1999 Stefan Grefen
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *      This product includes software developed by the NetBSD
49  *      Foundation, Inc. and its contributors.
50  * 4. Neither the name of The NetBSD Foundation nor the names of its
51  *    contributors may be used to endorse or promote products derived
52  *    from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
55  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  */
66 
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.139 2020/07/14 00:45:53 yamaguchi Exp $");
69 
70 #include "opt_ddb.h"
71 #include "opt_multiprocessor.h"
72 #include "opt_mpbios.h"		/* for MPDEBUG */
73 #include "opt_mtrr.h"
74 #include "opt_xen.h"
75 
76 #include "lapic.h"
77 #include "ioapic.h"
78 
79 #include <sys/param.h>
80 #include <sys/proc.h>
81 #include <sys/systm.h>
82 #include <sys/device.h>
83 #include <sys/kmem.h>
84 #include <sys/cpu.h>
85 #include <sys/cpufreq.h>
86 #include <sys/atomic.h>
87 #include <sys/reboot.h>
88 #include <sys/idle.h>
89 
90 #include <uvm/uvm.h>
91 
92 #include <machine/cpu.h>
93 #include <machine/cpufunc.h>
94 #include <machine/cpuvar.h>
95 #include <machine/pmap.h>
96 #include <machine/vmparam.h>
97 #include <machine/mpbiosvar.h>
98 #include <machine/pcb.h>
99 #include <machine/specialreg.h>
100 #include <machine/segments.h>
101 #include <machine/gdt.h>
102 #include <machine/mtrr.h>
103 #include <machine/pio.h>
104 
105 #include <x86/fpu.h>
106 
107 #include <xen/xen.h>
108 #include <xen/include/public/vcpu.h>
109 #include <xen/vcpuvar.h>
110 
111 #if NLAPIC > 0
112 #include <machine/apicvar.h>
113 #include <machine/i82489reg.h>
114 #include <machine/i82489var.h>
115 #endif
116 
117 #include <dev/ic/mc146818reg.h>
118 #include <dev/isa/isareg.h>
119 
120 static int	cpu_match(device_t, cfdata_t, void *);
121 static void	cpu_attach(device_t, device_t, void *);
122 static void	cpu_defer(device_t);
123 static int	cpu_rescan(device_t, const char *, const int *);
124 static void	cpu_childdetached(device_t, device_t);
125 static int	vcpu_match(device_t, cfdata_t, void *);
126 static void	vcpu_attach(device_t, device_t, void *);
127 static void	cpu_attach_common(device_t, device_t, void *);
128 void		cpu_offline_md(void);
129 
130 struct cpu_softc {
131 	device_t sc_dev;		/* device tree glue */
132 	struct cpu_info *sc_info;	/* pointer to CPU info */
133 	bool sc_wasonline;
134 };
135 
136 int mp_cpu_start(struct cpu_info *, vaddr_t);
137 void mp_cpu_start_cleanup(struct cpu_info *);
138 const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL,
139 				      mp_cpu_start_cleanup };
140 
141 CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc),
142     cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached);
143 
144 CFATTACH_DECL_NEW(vcpu, sizeof(struct cpu_softc),
145     vcpu_match, vcpu_attach, NULL, NULL);
146 
147 /*
148  * Statically-allocated CPU info for the primary CPU (or the only
149  * CPU, on uniprocessors).  The CPU info list is initialized to
150  * point at it.
151  */
152 struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = {
153 	.ci_dev = 0,
154 	.ci_self = &cpu_info_primary,
155 	.ci_idepth = -1,
156 	.ci_curlwp = &lwp0,
157 	.ci_curldt = -1,
158 };
159 struct cpu_info phycpu_info_primary __aligned(CACHE_LINE_SIZE) = {
160 	.ci_dev = 0,
161 	.ci_self = &phycpu_info_primary,
162 };
163 
164 struct cpu_info *cpu_info_list = &cpu_info_primary;
165 struct cpu_info *phycpu_info_list = &phycpu_info_primary;
166 
167 uint32_t cpu_feature[7] __read_mostly; /* X86 CPUID feature bits
168 			  *	[0] basic features %edx
169 			  *	[1] basic features %ecx
170 			  *	[2] extended features %edx
171 			  *	[3] extended features %ecx
172 			  *	[4] VIA padlock features
173 			  *	[5] structured extended features cpuid.7:%ebx
174 			  *	[6] structured extended features cpuid.7:%ecx
175 			  */
176 
177 bool x86_mp_online;
178 paddr_t mp_trampoline_paddr = MP_TRAMPOLINE;
179 
180 #if defined(MULTIPROCESSOR)
181 void    	cpu_hatch(void *);
182 static void    	cpu_boot_secondary(struct cpu_info *ci);
183 static void    	cpu_start_secondary(struct cpu_info *ci);
184 #endif	/* MULTIPROCESSOR */
185 
186 static int
187 cpu_match(device_t parent, cfdata_t match, void *aux)
188 {
189 
190 	return 1;
191 }
192 
193 static void
194 cpu_attach(device_t parent, device_t self, void *aux)
195 {
196 	struct cpu_softc *sc = device_private(self);
197 	struct cpu_attach_args *caa = aux;
198 	struct cpu_info *ci;
199 	uintptr_t ptr;
200 	static int nphycpu = 0;
201 
202 	sc->sc_dev = self;
203 
204 	/*
205 	 * If we're an Application Processor, allocate a cpu_info
206 	 * If we're the first attached CPU use the primary cpu_info,
207 	 * otherwise allocate a new one
208 	 */
209 	aprint_naive("\n");
210 	aprint_normal("\n");
211 	if (nphycpu > 0) {
212 		struct cpu_info *tmp;
213 		ptr = (uintptr_t)kmem_zalloc(sizeof(*ci) + CACHE_LINE_SIZE - 1,
214 		    KM_SLEEP);
215 		ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE);
216 		ci->ci_curldt = -1;
217 
218 		tmp = phycpu_info_list;
219 		while (tmp->ci_next)
220 			tmp = tmp->ci_next;
221 
222 		tmp->ci_next = ci;
223 	} else {
224 		ci = &phycpu_info_primary;
225 	}
226 
227 	ci->ci_self = ci;
228 	sc->sc_info = ci;
229 
230 	ci->ci_dev = self;
231 	ci->ci_acpiid = caa->cpu_id;
232 	ci->ci_cpuid = caa->cpu_number;
233 	ci->ci_vcpu = NULL;
234 	ci->ci_index = nphycpu++;
235 	ci->ci_kfpu_spl = -1;
236 
237 	if (!pmf_device_register(self, NULL, NULL))
238 		aprint_error_dev(self, "couldn't establish power handler\n");
239 
240 	(void)config_defer(self, cpu_defer);
241 }
242 
243 static void
244 cpu_defer(device_t self)
245 {
246 	cpu_rescan(self, NULL, NULL);
247 }
248 
249 static int
250 cpu_rescan(device_t self, const char *ifattr, const int *locators)
251 {
252 	struct cpu_softc *sc = device_private(self);
253 	struct cpufeature_attach_args cfaa;
254 	struct cpu_info *ci = sc->sc_info;
255 
256 	memset(&cfaa, 0, sizeof(cfaa));
257 	cfaa.ci = ci;
258 
259 	if (ifattr_match(ifattr, "cpufeaturebus")) {
260 
261 		if (ci->ci_frequency == NULL) {
262 			cfaa.name = "frequency";
263 			ci->ci_frequency = config_found_ia(self,
264 			    "cpufeaturebus", &cfaa, NULL);
265 		}
266 	}
267 
268 	return 0;
269 }
270 
271 static void
272 cpu_childdetached(device_t self, device_t child)
273 {
274 	struct cpu_softc *sc = device_private(self);
275 	struct cpu_info *ci = sc->sc_info;
276 
277 	if (ci->ci_frequency == child)
278 		ci->ci_frequency = NULL;
279 }
280 
281 static int
282 vcpu_match(device_t parent, cfdata_t match, void *aux)
283 {
284 	struct vcpu_attach_args *vcaa = aux;
285 	struct vcpu_runstate_info vcr;
286 	int error;
287 
288 	if (strcmp(vcaa->vcaa_name, match->cf_name) == 0) {
289 		error = HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info,
290 		    vcaa->vcaa_caa.cpu_number, &vcr);
291 		switch (error) {
292 		case 0:
293 			return 1;
294 		case -ENOENT:
295 			return 0;
296 		default:
297 			panic("Unknown hypervisor error %d returned on vcpu runstate probe\n", error);
298 		}
299 	}
300 
301 	return 0;
302 }
303 
304 static void
305 vcpu_attach(device_t parent, device_t self, void *aux)
306 {
307 	struct vcpu_attach_args *vcaa = aux;
308 
309 	KASSERT(vcaa->vcaa_caa.cpu_func == NULL);
310 	vcaa->vcaa_caa.cpu_func = &mp_cpu_funcs;
311 	cpu_attach_common(parent, self, &vcaa->vcaa_caa);
312 
313 	if (!pmf_device_register(self, NULL, NULL))
314 		aprint_error_dev(self, "couldn't establish power handler\n");
315 }
316 
317 static int
318 vcpu_is_up(struct cpu_info *ci)
319 {
320 	KASSERT(ci != NULL);
321 	return HYPERVISOR_vcpu_op(VCPUOP_is_up, ci->ci_vcpuid, NULL);
322 }
323 
324 static void
325 cpu_vm_init(struct cpu_info *ci)
326 {
327 	int ncolors = 2, i;
328 
329 	for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) {
330 		struct x86_cache_info *cai;
331 		int tcolors;
332 
333 		cai = &ci->ci_cinfo[i];
334 
335 		tcolors = atop(cai->cai_totalsize);
336 		switch (cai->cai_associativity) {
337 		case 0xff:
338 			tcolors = 1; /* fully associative */
339 			break;
340 		case 0:
341 		case 1:
342 			break;
343 		default:
344 			tcolors /= cai->cai_associativity;
345 		}
346 		ncolors = uimax(ncolors, tcolors);
347 	}
348 
349 	/*
350 	 * Knowing the size of the largest cache on this CPU, potentially
351 	 * re-color our pages.
352 	 */
353 	aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors);
354 	uvm_page_recolor(ncolors);
355 	pmap_tlb_cpu_init(ci);
356 #ifndef __HAVE_DIRECT_MAP
357 	pmap_vpage_cpu_init(ci);
358 #endif
359 }
360 
361 static void
362 cpu_attach_common(device_t parent, device_t self, void *aux)
363 {
364 	struct cpu_softc *sc = device_private(self);
365 	struct cpu_attach_args *caa = aux;
366 	struct cpu_info *ci;
367 	uintptr_t ptr;
368 	int cpunum = caa->cpu_number;
369 	static bool again = false;
370 
371 	sc->sc_dev = self;
372 
373 	/*
374 	 * If we're an Application Processor, allocate a cpu_info
375 	 * structure, otherwise use the primary's.
376 	 */
377 	if (caa->cpu_role == CPU_ROLE_AP) {
378 		aprint_naive(": Application Processor\n");
379 		ptr = (uintptr_t)kmem_alloc(sizeof(*ci) + CACHE_LINE_SIZE - 1,
380 		    KM_SLEEP);
381 		ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE);
382 		memset(ci, 0, sizeof(*ci));
383 		cpu_init_tss(ci);
384 	} else {
385 		aprint_naive(": %s Processor\n",
386 		    caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot");
387 		ci = &cpu_info_primary;
388 	}
389 
390 	ci->ci_self = ci;
391 	sc->sc_info = ci;
392 	ci->ci_dev = self;
393 	ci->ci_cpuid = cpunum;
394 	ci->ci_vcpuid = cpunum;
395 	ci->ci_kfpu_spl = -1;
396 
397 	KASSERT(HYPERVISOR_shared_info != NULL);
398 	KASSERT(cpunum < XEN_LEGACY_MAX_VCPUS);
399 	ci->ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[cpunum];
400 
401 	KASSERT(ci->ci_func == 0);
402 	ci->ci_func = caa->cpu_func;
403 	aprint_normal("\n");
404 
405 	/* Must be called before mi_cpu_attach(). */
406 	cpu_vm_init(ci);
407 
408 	if (caa->cpu_role == CPU_ROLE_AP) {
409 		int error;
410 
411 		error = mi_cpu_attach(ci);
412 
413 		KASSERT(ci->ci_data.cpu_idlelwp != NULL);
414 		if (error != 0) {
415 			aprint_error_dev(self,
416 			    "mi_cpu_attach failed with %d\n", error);
417 			return;
418 		}
419 
420 	} else {
421 		KASSERT(ci->ci_data.cpu_idlelwp != NULL);
422 	}
423 
424 	KASSERT(ci->ci_cpuid == ci->ci_index);
425 #ifdef __x86_64__
426 	/* No user PGD mapped for this CPU yet */
427 	ci->ci_xen_current_user_pgd = 0;
428 #endif
429 	mutex_init(&ci->ci_kpm_mtx, MUTEX_DEFAULT, IPL_VM);
430 	pmap_reference(pmap_kernel());
431 	ci->ci_pmap = pmap_kernel();
432 	ci->ci_tlbstate = TLBSTATE_STALE;
433 
434 	/*
435 	 * Boot processor may not be attached first, but the below
436 	 * must be done to allow booting other processors.
437 	 */
438 	if (!again) {
439 		atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY);
440 		/* Basic init. */
441 		cpu_intr_init(ci);
442 		cpu_get_tsc_freq(ci);
443 		cpu_init(ci);
444 		pmap_cpu_init_late(ci);
445 
446 		/* Every processor needs to init its own ipi h/w (similar to lapic) */
447 		xen_ipi_init();
448 
449 		/* Make sure DELAY() is initialized. */
450 		DELAY(1);
451 		again = true;
452 	}
453 
454 	/* further PCB init done later. */
455 
456 	switch (caa->cpu_role) {
457 	case CPU_ROLE_SP:
458 		atomic_or_32(&ci->ci_flags, CPUF_SP);
459 		cpu_identify(ci);
460 		x86_cpu_idle_init();
461 		break;
462 
463 	case CPU_ROLE_BP:
464 		atomic_or_32(&ci->ci_flags, CPUF_BSP);
465 		cpu_identify(ci);
466 		x86_cpu_idle_init();
467 		break;
468 
469 	case CPU_ROLE_AP:
470 		atomic_or_32(&ci->ci_flags, CPUF_AP);
471 
472 		/*
473 		 * report on an AP
474 		 */
475 
476 #if defined(MULTIPROCESSOR)
477 		/* interrupt handler stack */
478 		cpu_intr_init(ci);
479 
480 		/* Setup per-cpu memory for idt */
481 		idt_vec_init_cpu_md(&ci->ci_idtvec, cpu_index(ci));
482 
483 		/* Setup per-cpu memory for gdt */
484 		gdt_alloc_cpu(ci);
485 
486 		pmap_cpu_init_late(ci);
487 		cpu_start_secondary(ci);
488 
489 		if (ci->ci_flags & CPUF_PRESENT) {
490 			struct cpu_info *tmp;
491 
492 			cpu_identify(ci);
493 			tmp = cpu_info_list;
494 			while (tmp->ci_next)
495 				tmp = tmp->ci_next;
496 
497 			tmp->ci_next = ci;
498 		}
499 #else
500 		aprint_error_dev(ci->ci_dev, "not started\n");
501 #endif
502 		break;
503 
504 	default:
505 		panic("unknown processor type??\n");
506 	}
507 
508 #ifdef MPVERBOSE
509 	if (mp_verbose) {
510 		struct lwp *l = ci->ci_data.cpu_idlelwp;
511 		struct pcb *pcb = lwp_getpcb(l);
512 
513 		aprint_verbose_dev(self,
514 		    "idle lwp at %p, idle sp at %p\n",
515 		    l,
516 #ifdef i386
517 		    (void *)pcb->pcb_esp
518 #else
519 		    (void *)pcb->pcb_rsp
520 #endif
521 		);
522 
523 	}
524 #endif /* MPVERBOSE */
525 }
526 
527 /*
528  * Initialize the processor appropriately.
529  */
530 
531 void
532 cpu_init(struct cpu_info *ci)
533 {
534 	uint32_t cr4 = 0;
535 
536 	/*
537 	 * If we have FXSAVE/FXRESTOR, use them.
538 	 */
539 	if (cpu_feature[0] & CPUID_FXSR) {
540 		cr4 |= CR4_OSFXSR;
541 
542 		/*
543 		 * If we have SSE/SSE2, enable XMM exceptions.
544 		 */
545 		if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2))
546 			cr4 |= CR4_OSXMMEXCPT;
547 	}
548 
549 	/* If xsave is supported, enable it */
550 	if (cpu_feature[1] & CPUID2_XSAVE && x86_fpu_save >= FPU_SAVE_XSAVE)
551 		cr4 |= CR4_OSXSAVE;
552 
553 	if (cr4) {
554 		cr4 |= rcr4();
555 		lcr4(cr4);
556 	}
557 
558 	if (x86_fpu_save >= FPU_SAVE_FXSAVE) {
559 		fpuinit_mxcsr_mask();
560 	}
561 
562 	/*
563 	 * Changing CR4 register may change cpuid values. For example, setting
564 	 * CR4_OSXSAVE sets CPUID2_OSXSAVE. The CPUID2_OSXSAVE is in
565 	 * ci_feat_val[1], so update it.
566 	 * XXX Other than ci_feat_val[1] might be changed.
567 	 */
568 	if (cpuid_level >= 1) {
569 		u_int descs[4];
570 
571 		x86_cpuid(1, descs);
572 		ci->ci_feat_val[1] = descs[2];
573 	}
574 
575 	/* If xsave is enabled, enable all fpu features */
576 	if (cr4 & CR4_OSXSAVE) {
577 		wrxcr(0, x86_xsave_features & XCR0_FPU);
578 	}
579 
580 	atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
581 }
582 
583 
584 #ifdef MULTIPROCESSOR
585 
586 void
587 cpu_boot_secondary_processors(void)
588 {
589 	struct cpu_info *ci;
590 	kcpuset_t *cpus;
591 	u_long i;
592 
593 	kcpuset_create(&cpus, true);
594 	kcpuset_set(cpus, cpu_index(curcpu()));
595 	for (i = 0; i < maxcpus; i++) {
596 		ci = cpu_lookup(i);
597 		if (ci == NULL)
598 			continue;
599 		if (ci->ci_data.cpu_idlelwp == NULL)
600 			continue;
601 		if ((ci->ci_flags & CPUF_PRESENT) == 0)
602 			continue;
603 		if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY))
604 			continue;
605 		cpu_boot_secondary(ci);
606 		kcpuset_set(cpus, cpu_index(ci));
607 	}
608 	while (!kcpuset_match(cpus, kcpuset_running))
609 		;
610 	kcpuset_destroy(cpus);
611 
612 	x86_mp_online = true;
613 }
614 
615 static void
616 cpu_init_idle_lwp(struct cpu_info *ci)
617 {
618 	struct lwp *l = ci->ci_data.cpu_idlelwp;
619 	struct pcb *pcb = lwp_getpcb(l);
620 
621 	pcb->pcb_cr0 = rcr0();
622 }
623 
624 void
625 cpu_init_idle_lwps(void)
626 {
627 	struct cpu_info *ci;
628 	u_long i;
629 
630 	for (i = 0; i < maxcpus; i++) {
631 		ci = cpu_lookup(i);
632 		if (ci == NULL)
633 			continue;
634 		if (ci->ci_data.cpu_idlelwp == NULL)
635 			continue;
636 		if ((ci->ci_flags & CPUF_PRESENT) == 0)
637 			continue;
638 		cpu_init_idle_lwp(ci);
639 	}
640 }
641 
642 static void
643 cpu_start_secondary(struct cpu_info *ci)
644 {
645 	int i;
646 
647 	aprint_debug_dev(ci->ci_dev, "starting\n");
648 
649 	ci->ci_curlwp = ci->ci_data.cpu_idlelwp;
650 
651 	if (CPU_STARTUP(ci, (vaddr_t) cpu_hatch) != 0) {
652 		return;
653 	}
654 
655 	/*
656 	 * wait for it to become ready
657 	 */
658 	for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) {
659 		delay(10);
660 	}
661 	if ((ci->ci_flags & CPUF_PRESENT) == 0) {
662 		aprint_error_dev(ci->ci_dev, "failed to become ready\n");
663 #if defined(MPDEBUG) && defined(DDB)
664 		printf("dropping into debugger; continue from here to resume boot\n");
665 		Debugger();
666 #endif
667 	}
668 
669 	CPU_START_CLEANUP(ci);
670 }
671 
672 void
673 cpu_boot_secondary(struct cpu_info *ci)
674 {
675 	int i;
676 	atomic_or_32(&ci->ci_flags, CPUF_GO);
677 	for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) {
678 		delay(10);
679 	}
680 	if ((ci->ci_flags & CPUF_RUNNING) == 0) {
681 		aprint_error_dev(ci->ci_dev, "CPU failed to start\n");
682 #if defined(MPDEBUG) && defined(DDB)
683 		printf("dropping into debugger; continue from here to resume boot\n");
684 		Debugger();
685 #endif
686 	}
687 }
688 
689 /*
690  * APs end up here immediately after initialisation and VCPUOP_up in
691  * mp_cpu_start().
692  * At this point, we are running in the idle pcb/idle stack of the new
693  * CPU.  This function jumps to the idle loop and starts looking for
694  * work.
695  */
696 extern void x86_64_tls_switch(struct lwp *);
697 void
698 cpu_hatch(void *v)
699 {
700 	struct cpu_info *ci = (struct cpu_info *)v;
701 	struct pcb *pcb;
702 	int s, i;
703 
704 	/* Setup TLS and kernel GS/FS */
705 	cpu_init_msrs(ci, true);
706 	cpu_init_idt(ci);
707 	gdt_init_cpu(ci);
708 
709 	cpu_probe(ci);
710 
711 	atomic_or_32(&ci->ci_flags, CPUF_PRESENT);
712 
713 	while ((ci->ci_flags & CPUF_GO) == 0) {
714 		/* Don't use delay, boot CPU may be patching the text. */
715 		for (i = 10000; i != 0; i--)
716 			x86_pause();
717 	}
718 
719 	/* Because the text may have been patched in x86_patch(). */
720 	x86_flush();
721 	tlbflushg();
722 
723 	KASSERT((ci->ci_flags & CPUF_RUNNING) == 0);
724 
725 	KASSERT(ci->ci_curlwp == ci->ci_data.cpu_idlelwp);
726 	KASSERT(curlwp == ci->ci_data.cpu_idlelwp);
727 	pcb = lwp_getpcb(curlwp);
728 	pcb->pcb_cr3 = pmap_pdirpa(pmap_kernel(), 0);
729 
730 	xen_ipi_init();
731 
732 	xen_initclocks();
733 
734 #ifdef __x86_64__
735 	fpuinit(ci);
736 #endif
737 
738 	lldt(GSEL(GLDT_SEL, SEL_KPL));
739 
740 	cpu_init(ci);
741 	cpu_get_tsc_freq(ci);
742 
743 	s = splhigh();
744 	x86_enable_intr();
745 	splx(s);
746 
747 	aprint_debug_dev(ci->ci_dev, "running\n");
748 
749 	KASSERT(ci->ci_curlwp == ci->ci_data.cpu_idlelwp);
750 	idle_loop(NULL);
751 	KASSERT(false);
752 }
753 
754 #if defined(DDB)
755 
756 #include <ddb/db_output.h>
757 #include <machine/db_machdep.h>
758 
759 /*
760  * Dump CPU information from ddb.
761  */
762 void
763 cpu_debug_dump(void)
764 {
765 	struct cpu_info *ci;
766 	CPU_INFO_ITERATOR cii;
767 
768 	db_printf("addr		dev	id	flags	ipis	curlwp\n");
769 	for (CPU_INFO_FOREACH(cii, ci)) {
770 		db_printf("%p	%s	%ld	%x	%x	%10p\n",
771 		    ci,
772 		    ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev),
773 		    (long)ci->ci_vcpuid,
774 		    ci->ci_flags, ci->ci_ipis,
775 		    ci->ci_curlwp);
776 	}
777 }
778 #endif /* DDB */
779 
780 #endif /* MULTIPROCESSOR */
781 
782 extern void hypervisor_callback(void);
783 extern void failsafe_callback(void);
784 #ifdef __x86_64__
785 typedef void (vector)(void);
786 extern vector Xsyscall, Xsyscall32;
787 #endif
788 
789 /*
790  * Setup the "trampoline". On Xen, we setup nearly all cpu context
791  * outside a trampoline, so we prototype and call targetip like so:
792  * void targetip(struct cpu_info *);
793  */
794 
795 static void
796 gdt_prepframes(paddr_t *frames, vaddr_t base, uint32_t entries)
797 {
798 	int i;
799 	for (i = 0; i < entries; i++) {
800 		frames[i] = ((paddr_t)xpmap_ptetomach(
801 		    (pt_entry_t *)(base + (i << PAGE_SHIFT)))) >> PAGE_SHIFT;
802 
803 		/* Mark Read-only */
804 		pmap_pte_clearbits(kvtopte(base + (i << PAGE_SHIFT)),
805 		    PTE_W);
806 	}
807 }
808 
809 #ifdef __x86_64__
810 extern char *ldtstore;
811 
812 static void
813 xen_init_amd64_vcpuctxt(struct cpu_info *ci, struct vcpu_guest_context *initctx,
814     void targetrip(struct cpu_info *))
815 {
816 	/* page frames to point at GDT */
817 	extern int gdt_size;
818 	paddr_t frames[16];
819 	psize_t gdt_ents;
820 
821 	struct lwp *l;
822 	struct pcb *pcb;
823 
824 	volatile struct vcpu_info *vci;
825 
826 	KASSERT(ci != NULL);
827 	KASSERT(ci != &cpu_info_primary);
828 	KASSERT(initctx != NULL);
829 	KASSERT(targetrip != NULL);
830 
831 	memset(initctx, 0, sizeof(*initctx));
832 
833 	gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT;
834 	KASSERT(gdt_ents <= 16);
835 
836 	gdt_prepframes(frames, (vaddr_t)ci->ci_gdt, gdt_ents);
837 
838 	/* Initialise the vcpu context: We use idle_loop()'s pcb context. */
839 
840 	l = ci->ci_data.cpu_idlelwp;
841 
842 	KASSERT(l != NULL);
843 	pcb = lwp_getpcb(l);
844 	KASSERT(pcb != NULL);
845 
846 	/* resume with interrupts off */
847 	vci = ci->ci_vcpu;
848 	vci->evtchn_upcall_mask = 1;
849 	xen_mb();
850 
851 	/* resume in kernel-mode */
852 	initctx->flags = VGCF_in_kernel | VGCF_online;
853 
854 	/* Stack and entry points:
855 	 * We arrange for the stack frame for cpu_hatch() to
856 	 * appear as a callee frame of lwp_trampoline(). Being a
857 	 * leaf frame prevents trampling on any of the MD stack setup
858 	 * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop()
859 	 */
860 
861 	initctx->user_regs.rdi = (uint64_t) ci; /* targetrip(ci); */
862 	initctx->user_regs.rip = (vaddr_t) targetrip;
863 
864 	initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
865 
866 	initctx->user_regs.rflags = pcb->pcb_flags;
867 	initctx->user_regs.rsp = pcb->pcb_rsp;
868 
869 	/* Data segments */
870 	initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
871 	initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
872 	initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
873 
874 	/* GDT */
875 	memcpy(initctx->gdt_frames, frames, sizeof(frames));
876 	initctx->gdt_ents = gdt_ents;
877 
878 	/* LDT */
879 	initctx->ldt_base = (unsigned long)ldtstore;
880 	initctx->ldt_ents = LDT_SIZE >> 3;
881 
882 	/* Kernel context state */
883 	initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
884 	initctx->kernel_sp = pcb->pcb_rsp0;
885 	initctx->ctrlreg[0] = pcb->pcb_cr0;
886 	initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */
887 	initctx->ctrlreg[2] = (vaddr_t)targetrip;
888 	/*
889 	 * Use pmap_kernel() L4 PD directly, until we setup the
890 	 * per-cpu L4 PD in pmap_cpu_init_late()
891 	 */
892 	initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_kpm_pdirpa)));
893 	initctx->ctrlreg[4] = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT;
894 
895 	/* Xen callbacks */
896 	initctx->event_callback_eip = (unsigned long)hypervisor_callback;
897 	initctx->failsafe_callback_eip = (unsigned long)failsafe_callback;
898 	initctx->syscall_callback_eip = (unsigned long)Xsyscall;
899 
900 	return;
901 }
902 #else /* i386 */
903 extern union descriptor *ldtstore;
904 extern void Xsyscall(void);
905 
906 static void
907 xen_init_i386_vcpuctxt(struct cpu_info *ci, struct vcpu_guest_context *initctx,
908     void targeteip(struct cpu_info *))
909 {
910 	/* page frames to point at GDT */
911 	extern int gdt_size;
912 	paddr_t frames[16];
913 	psize_t gdt_ents;
914 
915 	struct lwp *l;
916 	struct pcb *pcb;
917 
918 	volatile struct vcpu_info *vci;
919 
920 	KASSERT(ci != NULL);
921 	KASSERT(ci != &cpu_info_primary);
922 	KASSERT(initctx != NULL);
923 	KASSERT(targeteip != NULL);
924 
925 	memset(initctx, 0, sizeof(*initctx));
926 
927 	gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT;
928 	KASSERT(gdt_ents <= 16);
929 
930 	gdt_prepframes(frames, (vaddr_t)ci->ci_gdt, gdt_ents);
931 
932 	/*
933 	 * Initialise the vcpu context:
934 	 * We use this cpu's idle_loop() pcb context.
935 	 */
936 
937 	l = ci->ci_data.cpu_idlelwp;
938 
939 	KASSERT(l != NULL);
940 	pcb = lwp_getpcb(l);
941 	KASSERT(pcb != NULL);
942 
943 	/* resume with interrupts off */
944 	vci = ci->ci_vcpu;
945 	vci->evtchn_upcall_mask = 1;
946 	xen_mb();
947 
948 	/* resume in kernel-mode */
949 	initctx->flags = VGCF_in_kernel | VGCF_online;
950 
951 	/* Stack frame setup for cpu_hatch():
952 	 * We arrange for the stack frame for cpu_hatch() to
953 	 * appear as a callee frame of lwp_trampoline(). Being a
954 	 * leaf frame prevents trampling on any of the MD stack setup
955 	 * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop()
956 	 */
957 
958 	initctx->user_regs.esp = pcb->pcb_esp - 4; /* Leave word for
959 						      arg1 */
960 	{
961 		/* targeteip(ci); */
962 		uint32_t *arg = (uint32_t *)initctx->user_regs.esp;
963 		arg[1] = (uint32_t)ci; /* arg1 */
964 	}
965 
966 	initctx->user_regs.eip = (vaddr_t)targeteip;
967 	initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
968 	initctx->user_regs.eflags |= pcb->pcb_iopl;
969 
970 	/* Data segments */
971 	initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
972 	initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
973 	initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
974 	initctx->user_regs.fs = GSEL(GDATA_SEL, SEL_KPL);
975 
976 	/* GDT */
977 	memcpy(initctx->gdt_frames, frames, sizeof(frames));
978 	initctx->gdt_ents = gdt_ents;
979 
980 	/* LDT */
981 	initctx->ldt_base = (unsigned long)ldtstore;
982 	initctx->ldt_ents = NLDT;
983 
984 	/* Kernel context state */
985 	initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
986 	initctx->kernel_sp = pcb->pcb_esp0;
987 	initctx->ctrlreg[0] = pcb->pcb_cr0;
988 	initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */
989 	initctx->ctrlreg[2] = (vaddr_t)targeteip;
990 	initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_pae_l3_pdirpa)));
991 	initctx->ctrlreg[4] = /* CR4_PAE | */CR4_OSFXSR | CR4_OSXMMEXCPT;
992 
993 	/* Xen callbacks */
994 	initctx->event_callback_eip = (unsigned long)hypervisor_callback;
995 	initctx->event_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
996 	initctx->failsafe_callback_eip = (unsigned long)failsafe_callback;
997 	initctx->failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
998 
999 	return;
1000 }
1001 #endif /* __x86_64__ */
1002 
1003 int
1004 mp_cpu_start(struct cpu_info *ci, vaddr_t target)
1005 {
1006 	int hyperror;
1007 	struct vcpu_guest_context *vcpuctx;
1008 
1009 	KASSERT(ci != NULL);
1010 	KASSERT(ci != &cpu_info_primary);
1011 	KASSERT(ci->ci_flags & CPUF_AP);
1012 
1013 	vcpuctx = kmem_alloc(sizeof(*vcpuctx), KM_SLEEP);
1014 
1015 #ifdef __x86_64__
1016 	xen_init_amd64_vcpuctxt(ci, vcpuctx, (void (*)(struct cpu_info *))target);
1017 #else
1018 	xen_init_i386_vcpuctxt(ci, vcpuctx, (void (*)(struct cpu_info *))target);
1019 #endif
1020 
1021 	/* Initialise the given vcpu to execute cpu_hatch(ci); */
1022 	if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_initialise, ci->ci_vcpuid, vcpuctx))) {
1023 		aprint_error(": context initialisation failed. errno = %d\n", hyperror);
1024 		goto out;
1025 	}
1026 
1027 	/* Start it up */
1028 
1029 	/* First bring it down */
1030 	if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_down, ci->ci_vcpuid, NULL))) {
1031 		aprint_error(": VCPUOP_down hypervisor command failed. errno = %d\n", hyperror);
1032 		goto out;
1033 	}
1034 
1035 	if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_up, ci->ci_vcpuid, NULL))) {
1036 		aprint_error(": VCPUOP_up hypervisor command failed. errno = %d\n", hyperror);
1037 		goto out;
1038 	}
1039 
1040 	if (!vcpu_is_up(ci)) {
1041 		aprint_error(": did not come up\n");
1042 		hyperror = -1;
1043 		goto out;
1044 	}
1045 
1046 out:
1047 	kmem_free(vcpuctx, sizeof(*vcpuctx));
1048 	return hyperror;
1049 }
1050 
1051 void
1052 mp_cpu_start_cleanup(struct cpu_info *ci)
1053 {
1054 	if (vcpu_is_up(ci)) {
1055 		aprint_debug_dev(ci->ci_dev, "is started.\n");
1056 	} else {
1057 		aprint_error_dev(ci->ci_dev, "did not start up.\n");
1058 	}
1059 }
1060 
1061 void
1062 cpu_init_msrs(struct cpu_info *ci, bool full)
1063 {
1064 #ifdef __x86_64__
1065 	if (full) {
1066 		HYPERVISOR_set_segment_base(SEGBASE_FS, 0);
1067 		HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, (uint64_t)ci);
1068 		HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 0);
1069 	}
1070 #endif
1071 
1072 	if (cpu_feature[2] & CPUID_NOX)
1073 		wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
1074 }
1075 
1076 void
1077 cpu_offline_md(void)
1078 {
1079 	return;
1080 }
1081 
1082 void
1083 cpu_get_tsc_freq(struct cpu_info *ci)
1084 {
1085 	uint32_t vcpu_tversion;
1086 	const volatile vcpu_time_info_t *tinfo = &ci->ci_vcpu->time;
1087 
1088 	vcpu_tversion = tinfo->version;
1089 	while (tinfo->version == vcpu_tversion); /* Wait for a time update. XXX: timeout ? */
1090 
1091 	uint64_t freq = 1000000000ULL << 32;
1092 	freq = freq / (uint64_t)tinfo->tsc_to_system_mul;
1093 	if (tinfo->tsc_shift < 0)
1094 		freq = freq << -tinfo->tsc_shift;
1095 	else
1096 		freq = freq >> tinfo->tsc_shift;
1097 	ci->ci_data.cpu_cc_freq = freq;
1098 }
1099 
1100 /*
1101  * Loads pmap for the current CPU.
1102  */
1103 void
1104 cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap)
1105 {
1106 	struct cpu_info *ci = curcpu();
1107 	cpuid_t cid = cpu_index(ci);
1108 	int i;
1109 
1110 	KASSERT(pmap != pmap_kernel());
1111 
1112 	mutex_enter(&ci->ci_kpm_mtx);
1113 	/* make new pmap visible to xen_kpm_sync() */
1114 	kcpuset_atomic_set(pmap->pm_xen_ptp_cpus, cid);
1115 
1116 #ifdef __x86_64__
1117 	pd_entry_t *new_pgd;
1118 	paddr_t l4_pd_ma;
1119 
1120 	l4_pd_ma = xpmap_ptom_masked(ci->ci_kpm_pdirpa);
1121 
1122 	/*
1123 	 * Map user space address in kernel space and load
1124 	 * user cr3
1125 	 */
1126 	new_pgd = pmap->pm_pdir;
1127 	KASSERT(pmap == ci->ci_pmap);
1128 
1129 	/* Copy user pmap L4 PDEs (in user addr. range) to per-cpu L4 */
1130 	for (i = 0; i < PDIR_SLOT_USERLIM; i++) {
1131 		KASSERT(pmap != pmap_kernel() || new_pgd[i] == 0);
1132 		if (ci->ci_kpm_pdir[i] != new_pgd[i]) {
1133 			xpq_queue_pte_update(l4_pd_ma + i * sizeof(pd_entry_t),
1134 			    new_pgd[i]);
1135 		}
1136 	}
1137 
1138 	xen_set_user_pgd(pmap_pdirpa(pmap, 0));
1139 	ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0);
1140 #else
1141 	paddr_t l3_pd = xpmap_ptom_masked(ci->ci_pae_l3_pdirpa);
1142 	/* don't update the kernel L3 slot */
1143 	for (i = 0; i < PDP_SIZE - 1; i++) {
1144 		xpq_queue_pte_update(l3_pd + i * sizeof(pd_entry_t),
1145 		    xpmap_ptom(pmap->pm_pdirpa[i]) | PTE_P);
1146 	}
1147 #endif
1148 
1149 	tlbflush();
1150 
1151 	/* old pmap no longer visible to xen_kpm_sync() */
1152 	if (oldpmap != pmap_kernel()) {
1153 		kcpuset_atomic_clear(oldpmap->pm_xen_ptp_cpus, cid);
1154 	}
1155 	mutex_exit(&ci->ci_kpm_mtx);
1156 }
1157 
1158 /*
1159  * pmap_cpu_init_late: perform late per-CPU initialization.
1160  *
1161  * Short note about percpu PDIR pages. Both the PAE and __x86_64__ architectures
1162  * have per-cpu PDIR tables, for two different reasons:
1163  *  - on PAE, this is to get around Xen's pagetable setup constraints (multiple
1164  *    L3[3]s cannot point to the same L2 - Xen will refuse to pin a table set up
1165  *    this way).
1166  *  - on __x86_64__, this is for multiple CPUs to map in different user pmaps
1167  *    (see cpu_load_pmap()).
1168  *
1169  * What this means for us is that the PDIR of the pmap_kernel() is considered
1170  * to be a canonical "SHADOW" PDIR with the following properties:
1171  *  - its recursive mapping points to itself
1172  *  - per-cpu recursive mappings point to themselves on __x86_64__
1173  *  - per-cpu L4 pages' kernel entries are expected to be in sync with
1174  *    the shadow
1175  */
1176 
1177 void
1178 pmap_cpu_init_late(struct cpu_info *ci)
1179 {
1180 	int i;
1181 
1182 	/*
1183 	 * The BP has already its own PD page allocated during early
1184 	 * MD startup.
1185 	 */
1186 
1187 #ifdef __x86_64__
1188 	/* Setup per-cpu normal_pdes */
1189 	extern pd_entry_t * const normal_pdes[];
1190 	for (i = 0;i < PTP_LEVELS - 1;i++) {
1191 		ci->ci_normal_pdes[i] = normal_pdes[i];
1192 	}
1193 #endif
1194 
1195 	if (ci == &cpu_info_primary)
1196 		return;
1197 
1198 	KASSERT(ci != NULL);
1199 
1200 #if defined(i386)
1201 	cpu_alloc_l3_page(ci);
1202 	KASSERT(ci->ci_pae_l3_pdirpa != 0);
1203 
1204 	/* Initialise L2 entries 0 - 2: Point them to pmap_kernel() */
1205 	for (i = 0; i < PDP_SIZE - 1; i++) {
1206 		ci->ci_pae_l3_pdir[i] =
1207 		    xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[i]) | PTE_P;
1208 	}
1209 #endif
1210 
1211 	ci->ci_kpm_pdir = (pd_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
1212 	    UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_NOWAIT);
1213 
1214 	if (ci->ci_kpm_pdir == NULL) {
1215 		panic("%s: failed to allocate L4 per-cpu PD for CPU %d\n",
1216 		    __func__, cpu_index(ci));
1217 	}
1218 	ci->ci_kpm_pdirpa = vtophys((vaddr_t)ci->ci_kpm_pdir);
1219 	KASSERT(ci->ci_kpm_pdirpa != 0);
1220 
1221 #ifdef __x86_64__
1222 	extern pt_entry_t xpmap_pg_nx;
1223 
1224 	/* Copy over the pmap_kernel() shadow L4 entries */
1225 	memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir, PAGE_SIZE);
1226 
1227 	/* Recursive kernel mapping */
1228 	ci->ci_kpm_pdir[PDIR_SLOT_PTE] = xpmap_ptom_masked(ci->ci_kpm_pdirpa)
1229 	    | PTE_P | xpmap_pg_nx;
1230 #else
1231 	/* Copy over the pmap_kernel() shadow L2 entries */
1232 	memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir + PDIR_SLOT_KERN,
1233 	    nkptp[PTP_LEVELS - 1] * sizeof(pd_entry_t));
1234 #endif
1235 
1236 	/* Xen wants a RO pdir. */
1237 	pmap_protect(pmap_kernel(), (vaddr_t)ci->ci_kpm_pdir,
1238 	    (vaddr_t)ci->ci_kpm_pdir + PAGE_SIZE, VM_PROT_READ);
1239 	pmap_update(pmap_kernel());
1240 
1241 #ifdef __x86_64__
1242 	xpq_queue_pin_l4_table(xpmap_ptom_masked(ci->ci_kpm_pdirpa));
1243 #else
1244 	/*
1245 	 * Initialize L3 entry 3. This mapping is shared across all pmaps and is
1246 	 * static, ie: loading a new pmap will not update this entry.
1247 	 */
1248 	ci->ci_pae_l3_pdir[3] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PTE_P;
1249 
1250 	/* Xen wants a RO L3. */
1251 	pmap_protect(pmap_kernel(), (vaddr_t)ci->ci_pae_l3_pdir,
1252 	    (vaddr_t)ci->ci_pae_l3_pdir + PAGE_SIZE, VM_PROT_READ);
1253 	pmap_update(pmap_kernel());
1254 
1255 	xpq_queue_pin_l3_table(xpmap_ptom_masked(ci->ci_pae_l3_pdirpa));
1256 #endif
1257 }
1258 
1259 /*
1260  * Notify all other cpus to halt.
1261  */
1262 
1263 void
1264 cpu_broadcast_halt(void)
1265 {
1266 	xen_broadcast_ipi(XEN_IPI_HALT);
1267 }
1268 
1269 /*
1270  * Send a dummy ipi to a cpu, and raise an AST on the running LWP.
1271  */
1272 
1273 void
1274 cpu_kick(struct cpu_info *ci)
1275 {
1276 	(void)xen_send_ipi(ci, XEN_IPI_AST);
1277 }
1278