xref: /netbsd-src/sys/arch/xen/xen/xen_machdep.c (revision c92238a8c9ca609b9c2c5db20d294e0d3039a32d)
1 /*	$NetBSD: xen_machdep.c,v 1.29 2023/10/17 10:24:11 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  *
30  * Copyright (c) 2004 Christian Limpach.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
43  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
45  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
46  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
47  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
51  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52  */
53 
54 
55 #include <sys/cdefs.h>
56 __KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.29 2023/10/17 10:24:11 riastradh Exp $");
57 
58 #include "opt_xen.h"
59 
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/boot_flag.h>
63 #include <sys/conf.h>
64 #include <sys/disk.h>
65 #include <sys/device.h>
66 #include <sys/mount.h>
67 #include <sys/reboot.h>
68 #include <sys/timetc.h>
69 #include <sys/sysctl.h>
70 #include <sys/pmf.h>
71 #include <sys/xcall.h>
72 
73 #include <dev/cons.h>
74 
75 #include <xen/intr.h>
76 #include <xen/hypervisor.h>
77 #include <xen/shutdown_xenbus.h>
78 #include <xen/include/public/version.h>
79 
80 #include <machine/pmap_private.h>
81 
82 #define DPRINTK(x) printk x
83 #if 0
84 #define DPRINTK(x)
85 #endif
86 
87 #ifdef DEBUG_GEOM
88 #define DPRINTF(a) printf a
89 #else
90 #define DPRINTF(a)
91 #endif
92 
93 
94 bool xen_suspend_allow;
95 
96 void
xen_parse_cmdline(int what,union xen_cmdline_parseinfo * xcp)97 xen_parse_cmdline(int what, union xen_cmdline_parseinfo *xcp)
98 {
99 	char _cmd_line[256], *cmd_line, *opt, *s;
100 	int b, i, ipidx = 0;
101 	uint32_t xi_ip[5];
102 	size_t len;
103 
104 	len = strlcpy(_cmd_line, xen_start_info.cmd_line, sizeof(_cmd_line));
105 	if (len > sizeof(_cmd_line)) {
106 		printf("command line exceeded limit of 255 chars. Truncated.\n");
107 	}
108 	cmd_line = _cmd_line;
109 
110 	switch (what) {
111 	case XEN_PARSE_BOOTDEV:
112 		xcp->xcp_bootdev[0] = 0;
113 		break;
114 	case XEN_PARSE_CONSOLE:
115 		xcp->xcp_console[0] = 0;
116 		break;
117 	}
118 
119 	while (cmd_line && *cmd_line) {
120 		opt = cmd_line;
121 		cmd_line = strchr(opt, ' ');
122 		if (cmd_line)
123 			*cmd_line = 0;
124 
125 		switch (what) {
126 		case XEN_PARSE_BOOTDEV:
127 			if (strncasecmp(opt, "bootdev=", 8) == 0) {
128 				strncpy(xcp->xcp_bootdev, opt + 8,
129 				    sizeof(xcp->xcp_bootdev));
130 				break;
131 			}
132 			if (strncasecmp(opt, "root=", 5) == 0) {
133 				strncpy(xcp->xcp_bootdev, opt + 5,
134 				    sizeof(xcp->xcp_bootdev));
135 				break;
136 			}
137 			break;
138 
139 		case XEN_PARSE_NETINFO:
140 			if (xcp->xcp_netinfo.xi_root &&
141 			    strncasecmp(opt, "nfsroot=", 8) == 0)
142 				strncpy(xcp->xcp_netinfo.xi_root, opt + 8,
143 				    MNAMELEN);
144 
145 			if (strncasecmp(opt, "ip=", 3) == 0) {
146 				memset(xi_ip, 0, sizeof(xi_ip));
147 				opt += 3;
148 				ipidx = 0;
149 				while (opt && *opt) {
150 					s = opt;
151 					opt = strchr(opt, ':');
152 					if (opt)
153 						*opt = 0;
154 
155 					switch (ipidx) {
156 					case 0:	/* ip */
157 					case 1:	/* nfs server */
158 					case 2:	/* gw */
159 					case 3:	/* mask */
160 					case 4:	/* host */
161 						if (*s == 0)
162 							break;
163 						for (i = 0; i < 4; i++) {
164 							b = strtoul(s, &s, 10);
165 							xi_ip[ipidx] = b + 256
166 								* xi_ip[ipidx];
167 							if (*s != '.')
168 								break;
169 							s++;
170 						}
171 						if (i < 3)
172 							xi_ip[ipidx] = 0;
173 						break;
174 					case 5:	/* interface */
175 						if (!strncmp(s, "xennet", 6))
176 							s += 6;
177 						else if (!strncmp(s, "eth", 3))
178 							s += 3;
179 						else
180 							break;
181 						if (xcp->xcp_netinfo.xi_ifno
182 						    == strtoul(s, NULL, 10))
183 							memcpy(xcp->
184 							    xcp_netinfo.xi_ip,
185 							    xi_ip,
186 							    sizeof(xi_ip));
187 						break;
188 					}
189 					ipidx++;
190 
191 					if (opt)
192 						*opt++ = ':';
193 				}
194 			}
195 			break;
196 
197 		case XEN_PARSE_CONSOLE:
198 			if (strncasecmp(opt, "console=", 8) == 0)
199 				strncpy(xcp->xcp_console, opt + 8,
200 				    sizeof(xcp->xcp_console));
201 			break;
202 
203 		case XEN_PARSE_BOOTFLAGS:
204 			if (*opt == '-') {
205 				opt++;
206 				while(*opt != '\0') {
207 					BOOT_FLAG(*opt, boothowto);
208 					opt++;
209 				}
210 			}
211 			break;
212 		case XEN_PARSE_PCIBACK:
213 			if (strncasecmp(opt, "pciback.hide=", 13) == 0)
214 				strncpy(xcp->xcp_pcidevs, opt + 13,
215 				    sizeof(xcp->xcp_pcidevs));
216 			break;
217 		}
218 
219 		if (cmd_line)
220 			*cmd_line++ = ' ';
221 	}
222 }
223 
224 #ifdef XENPV
225 
226 static int sysctl_xen_suspend(SYSCTLFN_ARGS);
227 static void xen_suspend_domain(void);
228 static void xen_prepare_suspend(void);
229 static void xen_prepare_resume(void);
230 
231 /*
232  * this function sets up the machdep.xen.suspend sysctl(7) that
233  * controls domain suspend/save.
234  */
235 void
sysctl_xen_suspend_setup(void)236 sysctl_xen_suspend_setup(void)
237 {
238 	const struct sysctlnode *node = NULL;
239 
240 	/*
241 	 * dom0 implements sleep support through ACPI. It should not call
242 	 * this function to register a suspend interface.
243 	 */
244 	KASSERT(!(xendomain_is_dom0()));
245 
246 	sysctl_createv(NULL, 0, NULL, &node,
247 	    CTLFLAG_PERMANENT,
248 	    CTLTYPE_NODE, "machdep", NULL,
249 	    NULL, 0, NULL, 0,
250 	    CTL_MACHDEP, CTL_EOL);
251 
252 	sysctl_createv(NULL, 0, &node, &node,
253 	    CTLFLAG_PERMANENT,
254 	    CTLTYPE_NODE, "xen",
255 	    SYSCTL_DESCR("Xen top level node"),
256 	    NULL, 0, NULL, 0,
257 	    CTL_CREATE, CTL_EOL);
258 
259 	sysctl_createv(NULL, 0, &node, &node,
260 	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE,
261 	    CTLTYPE_INT, "suspend",
262 	    SYSCTL_DESCR("Suspend/save current Xen domain"),
263 	    sysctl_xen_suspend, 0, NULL, 0,
264 	    CTL_CREATE, CTL_EOL);
265 }
266 
267 static int
sysctl_xen_suspend(SYSCTLFN_ARGS)268 sysctl_xen_suspend(SYSCTLFN_ARGS)
269 {
270 	int error;
271 	struct sysctlnode node;
272 
273 	node = *rnode;
274 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
275 
276 	if (error || newp == NULL)
277 		return error;
278 
279 	/* only allow domain to suspend when dom0 instructed to do so */
280 	if (xen_suspend_allow == false)
281 		return EAGAIN;
282 
283 	xen_suspend_domain();
284 
285 	return 0;
286 
287 }
288 
289 static void xen_suspendclocks_xc(void *, void*);
290 static void xen_resumeclocks_xc(void *, void*);
291 
292 /*
293  * Last operations before suspending domain
294  */
295 static void
xen_prepare_suspend(void)296 xen_prepare_suspend(void)
297 {
298 
299 	kpreempt_disable();
300 
301 	pmap_xen_suspend();
302 	xc_wait(xc_broadcast(0, &xen_suspendclocks_xc, NULL, NULL));
303 
304 	/*
305 	 * save/restore code does not translate these MFNs to their
306 	 * associated PFNs, so we must do it
307 	 */
308 	xen_start_info.store_mfn =
309 	    atop(xpmap_mtop(ptoa(xen_start_info.store_mfn)));
310 	xen_start_info.console_mfn =
311 	    atop(xpmap_mtop(ptoa(xen_start_info.console_mfn)));
312 
313 	DPRINTK(("suspending domain\n"));
314 	aprint_verbose("suspending domain\n");
315 
316 	/* invalidate the shared_info page */
317 	if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
318 	    0, UVMF_INVLPG)) {
319 		DPRINTK(("HYPERVISOR_shared_info page invalidation failed"));
320 		HYPERVISOR_crash();
321 	}
322 
323 }
324 
325 static void
xen_suspendclocks_xc(void * a,void * b)326 xen_suspendclocks_xc(void *a, void *b)
327 {
328 
329 	kpreempt_disable();
330 	xen_suspendclocks(curcpu());
331 	kpreempt_enable();
332 }
333 
334 /*
335  * First operations before restoring domain context
336  */
337 static void
xen_prepare_resume(void)338 xen_prepare_resume(void)
339 {
340 	/* map the new shared_info page */
341 	if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
342 	    xen_start_info.shared_info | PTE_W | PTE_P,
343 	    UVMF_INVLPG)) {
344 		DPRINTK(("could not map new shared info page"));
345 		HYPERVISOR_crash();
346 	}
347 
348 	pmap_xen_resume();
349 
350 	if (xen_start_info.nr_pages != physmem) {
351 		/*
352 		 * XXX JYM for now, we crash - fix it with memory
353 		 * hotplug when supported
354 		 */
355 		DPRINTK(("xen_start_info.nr_pages != physmem"));
356 		HYPERVISOR_crash();
357 	}
358 
359 	DPRINTK(("preparing domain resume\n"));
360 	aprint_verbose("preparing domain resume\n");
361 
362 	xen_suspend_allow = false;
363 
364 	xc_wait(xc_broadcast(0, xen_resumeclocks_xc, NULL, NULL));
365 
366 	kpreempt_enable();
367 
368 }
369 
370 static void
xen_resumeclocks_xc(void * a,void * b)371 xen_resumeclocks_xc(void *a, void *b)
372 {
373 
374 	kpreempt_disable();
375 	xen_resumeclocks(curcpu());
376 	kpreempt_enable();
377 }
378 
379 static void
xen_suspend_domain(void)380 xen_suspend_domain(void)
381 {
382 	paddr_t mfn;
383 	int s = splvm(); /* XXXSMP */
384 
385 	/*
386 	 * console becomes unavailable when suspended, so
387 	 * direct communications to domain are hampered from there on.
388 	 * We can only rely on low level primitives like printk(), until
389 	 * console is fully restored
390 	 */
391 	if (!pmf_system_suspend(PMF_Q_NONE)) {
392 		DPRINTK(("devices suspend failed"));
393 		HYPERVISOR_crash();
394 	}
395 
396 	/*
397 	 * obtain the MFN of the start_info page now, as we will not be
398 	 * able to do it once pmap is locked
399 	 */
400 	pmap_extract_ma(pmap_kernel(), (vaddr_t)&xen_start_info, &mfn);
401 	mfn >>= PAGE_SHIFT;
402 
403 	xen_prepare_suspend();
404 
405 	DPRINTK(("calling HYPERVISOR_suspend()\n"));
406 	if (HYPERVISOR_suspend(mfn) != 0) {
407 	/* XXX JYM: implement checkpoint/snapshot (ret == 1) */
408 		DPRINTK(("HYPERVISOR_suspend() failed"));
409 		HYPERVISOR_crash();
410 	}
411 
412 	DPRINTK(("left HYPERVISOR_suspend()\n"));
413 
414 	xen_prepare_resume();
415 
416 	DPRINTK(("resuming devices\n"));
417 	if (!pmf_system_resume(PMF_Q_NONE)) {
418 		DPRINTK(("devices resume failed\n"));
419 		HYPERVISOR_crash();
420 	}
421 
422 	splx(s);
423 
424 	/* xencons is back online, we can print to console */
425 	aprint_verbose("domain resumed\n");
426 
427 }
428 #endif /* XENPV */
429 
430 #define PRINTK_BUFSIZE 1024
431 void
printk(const char * fmt,...)432 printk(const char *fmt, ...)
433 {
434 	va_list ap;
435 	int ret;
436 	static char buf[PRINTK_BUFSIZE];
437 
438 	va_start(ap, fmt);
439 	ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
440 	va_end(ap);
441 	buf[ret] = 0;
442 	(void)HYPERVISOR_console_io(CONSOLEIO_write, ret, buf);
443 }
444 
445 static int early_xenconscn_getc(dev_t);
446 static void early_xenconscn_putc(dev_t, int);
447 static void early_xenconscn_pollc(dev_t, int);
448 
449 static struct consdev early_xencons = {
450 	NULL, NULL,
451 	early_xenconscn_getc, early_xenconscn_putc, early_xenconscn_pollc,
452 	NULL, NULL, NULL, NODEV, CN_NORMAL
453 };
454 
455 void
xen_early_console(void)456 xen_early_console(void)
457 {
458 	cn_tab = &early_xencons; /* fallback console */
459 }
460 
461 static int
early_xenconscn_getc(dev_t dev)462 early_xenconscn_getc(dev_t dev)
463 {
464 	while(1)
465 		;
466 	return -1;
467 }
468 
469 static void
early_xenconscn_putc(dev_t dev,int c)470 early_xenconscn_putc(dev_t dev, int c)
471 {
472 	printk("%c", c);
473 }
474 
475 static void
early_xenconscn_pollc(dev_t dev,int on)476 early_xenconscn_pollc(dev_t dev, int on)
477 {
478 	return;
479 }
480 bool xen_feature_tables[XENFEAT_NR_SUBMAPS * 32];
481 
482 void
xen_init_features(void)483 xen_init_features(void)
484 {
485 	xen_feature_info_t features;
486 
487 	for (int sm = 0; sm < XENFEAT_NR_SUBMAPS; sm++) {
488 		features.submap_idx = sm;
489 		if (HYPERVISOR_xen_version(XENVER_get_features, &features) < 0)
490 			break;
491 		for (int f = 0; f < 32; f++) {
492 			xen_feature_tables[sm * 32 + f] =
493 			    (features.submap & (1 << f)) ? 1 : 0;
494 		}
495 	}
496 }
497 
498 /*
499  * Attempt to find the device from which we were booted.
500  */
501 
502 static int
is_valid_disk(device_t dv)503 is_valid_disk(device_t dv)
504 {
505 	if (device_class(dv) != DV_DISK)
506 		return (0);
507 
508 	return (device_is_a(dv, "dk") ||
509 		device_is_a(dv, "sd") ||
510 		device_is_a(dv, "wd") ||
511 		device_is_a(dv, "ld") ||
512 		device_is_a(dv, "ed") ||
513 		device_is_a(dv, "xbd"));
514 }
515 
516 void
xen_bootconf(void)517 xen_bootconf(void)
518 {
519 	device_t dv;
520 	deviter_t di;
521 	union xen_cmdline_parseinfo xcp;
522 	static char bootspecbuf[sizeof(xcp.xcp_bootdev)];
523 
524 	if (booted_device) {
525 		DPRINTF(("%s: preset booted_device: %s\n", __func__, device_xname(booted_device)));
526 		return;
527 	}
528 
529 	xen_parse_cmdline(XEN_PARSE_BOOTDEV, &xcp);
530 
531 	for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
532 	     dv != NULL;
533 	     dv = deviter_next(&di)) {
534 		bool is_ifnet, is_disk;
535 		const char *devname;
536 
537 		is_ifnet = (device_class(dv) == DV_IFNET);
538 		is_disk = is_valid_disk(dv);
539 		devname = device_xname(dv);
540 
541 		if (!is_ifnet && !is_disk)
542 			continue;
543 
544 		if (is_disk && xcp.xcp_bootdev[0] == 0) {
545 			booted_device = dv;
546 			break;
547 		}
548 
549 		if (strncmp(xcp.xcp_bootdev, devname, strlen(devname)))
550 			continue;
551 
552 		if (is_disk && strlen(xcp.xcp_bootdev) > strlen(devname)) {
553 			/* XXX check device_cfdata as in x86_autoconf.c? */
554 			booted_partition = toupper(
555 				xcp.xcp_bootdev[strlen(devname)]) - 'A';
556 			DPRINTF(("%s: booted_partition: %d\n", __func__, booted_partition));
557 		}
558 
559 		booted_device = dv;
560 		booted_method = "bootinfo/bootdev";
561 		break;
562 	}
563 	deviter_release(&di);
564 
565 	if (booted_device) {
566 		DPRINTF(("%s: booted_device: %s\n", __func__, device_xname(booted_device)));
567 		return;
568 	}
569 
570 	/*
571 	 * not a boot device name, pass through to MI code
572 	 */
573 	if (xcp.xcp_bootdev[0] != '\0') {
574 		strlcpy(bootspecbuf, xcp.xcp_bootdev, sizeof(bootspecbuf));
575 		bootspec = bootspecbuf;
576 		booted_method = "bootinfo/bootspec";
577 		DPRINTF(("%s: bootspec: %s\n", __func__, bootspec));
578 		return;
579 	}
580 }
581