1 /* $NetBSD: xen_machdep.c,v 1.29 2023/10/17 10:24:11 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28 /*
29 *
30 * Copyright (c) 2004 Christian Limpach.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
43 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
45 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
46 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
47 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
51 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54
55 #include <sys/cdefs.h>
56 __KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.29 2023/10/17 10:24:11 riastradh Exp $");
57
58 #include "opt_xen.h"
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/boot_flag.h>
63 #include <sys/conf.h>
64 #include <sys/disk.h>
65 #include <sys/device.h>
66 #include <sys/mount.h>
67 #include <sys/reboot.h>
68 #include <sys/timetc.h>
69 #include <sys/sysctl.h>
70 #include <sys/pmf.h>
71 #include <sys/xcall.h>
72
73 #include <dev/cons.h>
74
75 #include <xen/intr.h>
76 #include <xen/hypervisor.h>
77 #include <xen/shutdown_xenbus.h>
78 #include <xen/include/public/version.h>
79
80 #include <machine/pmap_private.h>
81
82 #define DPRINTK(x) printk x
83 #if 0
84 #define DPRINTK(x)
85 #endif
86
87 #ifdef DEBUG_GEOM
88 #define DPRINTF(a) printf a
89 #else
90 #define DPRINTF(a)
91 #endif
92
93
94 bool xen_suspend_allow;
95
96 void
xen_parse_cmdline(int what,union xen_cmdline_parseinfo * xcp)97 xen_parse_cmdline(int what, union xen_cmdline_parseinfo *xcp)
98 {
99 char _cmd_line[256], *cmd_line, *opt, *s;
100 int b, i, ipidx = 0;
101 uint32_t xi_ip[5];
102 size_t len;
103
104 len = strlcpy(_cmd_line, xen_start_info.cmd_line, sizeof(_cmd_line));
105 if (len > sizeof(_cmd_line)) {
106 printf("command line exceeded limit of 255 chars. Truncated.\n");
107 }
108 cmd_line = _cmd_line;
109
110 switch (what) {
111 case XEN_PARSE_BOOTDEV:
112 xcp->xcp_bootdev[0] = 0;
113 break;
114 case XEN_PARSE_CONSOLE:
115 xcp->xcp_console[0] = 0;
116 break;
117 }
118
119 while (cmd_line && *cmd_line) {
120 opt = cmd_line;
121 cmd_line = strchr(opt, ' ');
122 if (cmd_line)
123 *cmd_line = 0;
124
125 switch (what) {
126 case XEN_PARSE_BOOTDEV:
127 if (strncasecmp(opt, "bootdev=", 8) == 0) {
128 strncpy(xcp->xcp_bootdev, opt + 8,
129 sizeof(xcp->xcp_bootdev));
130 break;
131 }
132 if (strncasecmp(opt, "root=", 5) == 0) {
133 strncpy(xcp->xcp_bootdev, opt + 5,
134 sizeof(xcp->xcp_bootdev));
135 break;
136 }
137 break;
138
139 case XEN_PARSE_NETINFO:
140 if (xcp->xcp_netinfo.xi_root &&
141 strncasecmp(opt, "nfsroot=", 8) == 0)
142 strncpy(xcp->xcp_netinfo.xi_root, opt + 8,
143 MNAMELEN);
144
145 if (strncasecmp(opt, "ip=", 3) == 0) {
146 memset(xi_ip, 0, sizeof(xi_ip));
147 opt += 3;
148 ipidx = 0;
149 while (opt && *opt) {
150 s = opt;
151 opt = strchr(opt, ':');
152 if (opt)
153 *opt = 0;
154
155 switch (ipidx) {
156 case 0: /* ip */
157 case 1: /* nfs server */
158 case 2: /* gw */
159 case 3: /* mask */
160 case 4: /* host */
161 if (*s == 0)
162 break;
163 for (i = 0; i < 4; i++) {
164 b = strtoul(s, &s, 10);
165 xi_ip[ipidx] = b + 256
166 * xi_ip[ipidx];
167 if (*s != '.')
168 break;
169 s++;
170 }
171 if (i < 3)
172 xi_ip[ipidx] = 0;
173 break;
174 case 5: /* interface */
175 if (!strncmp(s, "xennet", 6))
176 s += 6;
177 else if (!strncmp(s, "eth", 3))
178 s += 3;
179 else
180 break;
181 if (xcp->xcp_netinfo.xi_ifno
182 == strtoul(s, NULL, 10))
183 memcpy(xcp->
184 xcp_netinfo.xi_ip,
185 xi_ip,
186 sizeof(xi_ip));
187 break;
188 }
189 ipidx++;
190
191 if (opt)
192 *opt++ = ':';
193 }
194 }
195 break;
196
197 case XEN_PARSE_CONSOLE:
198 if (strncasecmp(opt, "console=", 8) == 0)
199 strncpy(xcp->xcp_console, opt + 8,
200 sizeof(xcp->xcp_console));
201 break;
202
203 case XEN_PARSE_BOOTFLAGS:
204 if (*opt == '-') {
205 opt++;
206 while(*opt != '\0') {
207 BOOT_FLAG(*opt, boothowto);
208 opt++;
209 }
210 }
211 break;
212 case XEN_PARSE_PCIBACK:
213 if (strncasecmp(opt, "pciback.hide=", 13) == 0)
214 strncpy(xcp->xcp_pcidevs, opt + 13,
215 sizeof(xcp->xcp_pcidevs));
216 break;
217 }
218
219 if (cmd_line)
220 *cmd_line++ = ' ';
221 }
222 }
223
224 #ifdef XENPV
225
226 static int sysctl_xen_suspend(SYSCTLFN_ARGS);
227 static void xen_suspend_domain(void);
228 static void xen_prepare_suspend(void);
229 static void xen_prepare_resume(void);
230
231 /*
232 * this function sets up the machdep.xen.suspend sysctl(7) that
233 * controls domain suspend/save.
234 */
235 void
sysctl_xen_suspend_setup(void)236 sysctl_xen_suspend_setup(void)
237 {
238 const struct sysctlnode *node = NULL;
239
240 /*
241 * dom0 implements sleep support through ACPI. It should not call
242 * this function to register a suspend interface.
243 */
244 KASSERT(!(xendomain_is_dom0()));
245
246 sysctl_createv(NULL, 0, NULL, &node,
247 CTLFLAG_PERMANENT,
248 CTLTYPE_NODE, "machdep", NULL,
249 NULL, 0, NULL, 0,
250 CTL_MACHDEP, CTL_EOL);
251
252 sysctl_createv(NULL, 0, &node, &node,
253 CTLFLAG_PERMANENT,
254 CTLTYPE_NODE, "xen",
255 SYSCTL_DESCR("Xen top level node"),
256 NULL, 0, NULL, 0,
257 CTL_CREATE, CTL_EOL);
258
259 sysctl_createv(NULL, 0, &node, &node,
260 CTLFLAG_PERMANENT | CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE,
261 CTLTYPE_INT, "suspend",
262 SYSCTL_DESCR("Suspend/save current Xen domain"),
263 sysctl_xen_suspend, 0, NULL, 0,
264 CTL_CREATE, CTL_EOL);
265 }
266
267 static int
sysctl_xen_suspend(SYSCTLFN_ARGS)268 sysctl_xen_suspend(SYSCTLFN_ARGS)
269 {
270 int error;
271 struct sysctlnode node;
272
273 node = *rnode;
274 error = sysctl_lookup(SYSCTLFN_CALL(&node));
275
276 if (error || newp == NULL)
277 return error;
278
279 /* only allow domain to suspend when dom0 instructed to do so */
280 if (xen_suspend_allow == false)
281 return EAGAIN;
282
283 xen_suspend_domain();
284
285 return 0;
286
287 }
288
289 static void xen_suspendclocks_xc(void *, void*);
290 static void xen_resumeclocks_xc(void *, void*);
291
292 /*
293 * Last operations before suspending domain
294 */
295 static void
xen_prepare_suspend(void)296 xen_prepare_suspend(void)
297 {
298
299 kpreempt_disable();
300
301 pmap_xen_suspend();
302 xc_wait(xc_broadcast(0, &xen_suspendclocks_xc, NULL, NULL));
303
304 /*
305 * save/restore code does not translate these MFNs to their
306 * associated PFNs, so we must do it
307 */
308 xen_start_info.store_mfn =
309 atop(xpmap_mtop(ptoa(xen_start_info.store_mfn)));
310 xen_start_info.console_mfn =
311 atop(xpmap_mtop(ptoa(xen_start_info.console_mfn)));
312
313 DPRINTK(("suspending domain\n"));
314 aprint_verbose("suspending domain\n");
315
316 /* invalidate the shared_info page */
317 if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
318 0, UVMF_INVLPG)) {
319 DPRINTK(("HYPERVISOR_shared_info page invalidation failed"));
320 HYPERVISOR_crash();
321 }
322
323 }
324
325 static void
xen_suspendclocks_xc(void * a,void * b)326 xen_suspendclocks_xc(void *a, void *b)
327 {
328
329 kpreempt_disable();
330 xen_suspendclocks(curcpu());
331 kpreempt_enable();
332 }
333
334 /*
335 * First operations before restoring domain context
336 */
337 static void
xen_prepare_resume(void)338 xen_prepare_resume(void)
339 {
340 /* map the new shared_info page */
341 if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
342 xen_start_info.shared_info | PTE_W | PTE_P,
343 UVMF_INVLPG)) {
344 DPRINTK(("could not map new shared info page"));
345 HYPERVISOR_crash();
346 }
347
348 pmap_xen_resume();
349
350 if (xen_start_info.nr_pages != physmem) {
351 /*
352 * XXX JYM for now, we crash - fix it with memory
353 * hotplug when supported
354 */
355 DPRINTK(("xen_start_info.nr_pages != physmem"));
356 HYPERVISOR_crash();
357 }
358
359 DPRINTK(("preparing domain resume\n"));
360 aprint_verbose("preparing domain resume\n");
361
362 xen_suspend_allow = false;
363
364 xc_wait(xc_broadcast(0, xen_resumeclocks_xc, NULL, NULL));
365
366 kpreempt_enable();
367
368 }
369
370 static void
xen_resumeclocks_xc(void * a,void * b)371 xen_resumeclocks_xc(void *a, void *b)
372 {
373
374 kpreempt_disable();
375 xen_resumeclocks(curcpu());
376 kpreempt_enable();
377 }
378
379 static void
xen_suspend_domain(void)380 xen_suspend_domain(void)
381 {
382 paddr_t mfn;
383 int s = splvm(); /* XXXSMP */
384
385 /*
386 * console becomes unavailable when suspended, so
387 * direct communications to domain are hampered from there on.
388 * We can only rely on low level primitives like printk(), until
389 * console is fully restored
390 */
391 if (!pmf_system_suspend(PMF_Q_NONE)) {
392 DPRINTK(("devices suspend failed"));
393 HYPERVISOR_crash();
394 }
395
396 /*
397 * obtain the MFN of the start_info page now, as we will not be
398 * able to do it once pmap is locked
399 */
400 pmap_extract_ma(pmap_kernel(), (vaddr_t)&xen_start_info, &mfn);
401 mfn >>= PAGE_SHIFT;
402
403 xen_prepare_suspend();
404
405 DPRINTK(("calling HYPERVISOR_suspend()\n"));
406 if (HYPERVISOR_suspend(mfn) != 0) {
407 /* XXX JYM: implement checkpoint/snapshot (ret == 1) */
408 DPRINTK(("HYPERVISOR_suspend() failed"));
409 HYPERVISOR_crash();
410 }
411
412 DPRINTK(("left HYPERVISOR_suspend()\n"));
413
414 xen_prepare_resume();
415
416 DPRINTK(("resuming devices\n"));
417 if (!pmf_system_resume(PMF_Q_NONE)) {
418 DPRINTK(("devices resume failed\n"));
419 HYPERVISOR_crash();
420 }
421
422 splx(s);
423
424 /* xencons is back online, we can print to console */
425 aprint_verbose("domain resumed\n");
426
427 }
428 #endif /* XENPV */
429
430 #define PRINTK_BUFSIZE 1024
431 void
printk(const char * fmt,...)432 printk(const char *fmt, ...)
433 {
434 va_list ap;
435 int ret;
436 static char buf[PRINTK_BUFSIZE];
437
438 va_start(ap, fmt);
439 ret = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
440 va_end(ap);
441 buf[ret] = 0;
442 (void)HYPERVISOR_console_io(CONSOLEIO_write, ret, buf);
443 }
444
445 static int early_xenconscn_getc(dev_t);
446 static void early_xenconscn_putc(dev_t, int);
447 static void early_xenconscn_pollc(dev_t, int);
448
449 static struct consdev early_xencons = {
450 NULL, NULL,
451 early_xenconscn_getc, early_xenconscn_putc, early_xenconscn_pollc,
452 NULL, NULL, NULL, NODEV, CN_NORMAL
453 };
454
455 void
xen_early_console(void)456 xen_early_console(void)
457 {
458 cn_tab = &early_xencons; /* fallback console */
459 }
460
461 static int
early_xenconscn_getc(dev_t dev)462 early_xenconscn_getc(dev_t dev)
463 {
464 while(1)
465 ;
466 return -1;
467 }
468
469 static void
early_xenconscn_putc(dev_t dev,int c)470 early_xenconscn_putc(dev_t dev, int c)
471 {
472 printk("%c", c);
473 }
474
475 static void
early_xenconscn_pollc(dev_t dev,int on)476 early_xenconscn_pollc(dev_t dev, int on)
477 {
478 return;
479 }
480 bool xen_feature_tables[XENFEAT_NR_SUBMAPS * 32];
481
482 void
xen_init_features(void)483 xen_init_features(void)
484 {
485 xen_feature_info_t features;
486
487 for (int sm = 0; sm < XENFEAT_NR_SUBMAPS; sm++) {
488 features.submap_idx = sm;
489 if (HYPERVISOR_xen_version(XENVER_get_features, &features) < 0)
490 break;
491 for (int f = 0; f < 32; f++) {
492 xen_feature_tables[sm * 32 + f] =
493 (features.submap & (1 << f)) ? 1 : 0;
494 }
495 }
496 }
497
498 /*
499 * Attempt to find the device from which we were booted.
500 */
501
502 static int
is_valid_disk(device_t dv)503 is_valid_disk(device_t dv)
504 {
505 if (device_class(dv) != DV_DISK)
506 return (0);
507
508 return (device_is_a(dv, "dk") ||
509 device_is_a(dv, "sd") ||
510 device_is_a(dv, "wd") ||
511 device_is_a(dv, "ld") ||
512 device_is_a(dv, "ed") ||
513 device_is_a(dv, "xbd"));
514 }
515
516 void
xen_bootconf(void)517 xen_bootconf(void)
518 {
519 device_t dv;
520 deviter_t di;
521 union xen_cmdline_parseinfo xcp;
522 static char bootspecbuf[sizeof(xcp.xcp_bootdev)];
523
524 if (booted_device) {
525 DPRINTF(("%s: preset booted_device: %s\n", __func__, device_xname(booted_device)));
526 return;
527 }
528
529 xen_parse_cmdline(XEN_PARSE_BOOTDEV, &xcp);
530
531 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
532 dv != NULL;
533 dv = deviter_next(&di)) {
534 bool is_ifnet, is_disk;
535 const char *devname;
536
537 is_ifnet = (device_class(dv) == DV_IFNET);
538 is_disk = is_valid_disk(dv);
539 devname = device_xname(dv);
540
541 if (!is_ifnet && !is_disk)
542 continue;
543
544 if (is_disk && xcp.xcp_bootdev[0] == 0) {
545 booted_device = dv;
546 break;
547 }
548
549 if (strncmp(xcp.xcp_bootdev, devname, strlen(devname)))
550 continue;
551
552 if (is_disk && strlen(xcp.xcp_bootdev) > strlen(devname)) {
553 /* XXX check device_cfdata as in x86_autoconf.c? */
554 booted_partition = toupper(
555 xcp.xcp_bootdev[strlen(devname)]) - 'A';
556 DPRINTF(("%s: booted_partition: %d\n", __func__, booted_partition));
557 }
558
559 booted_device = dv;
560 booted_method = "bootinfo/bootdev";
561 break;
562 }
563 deviter_release(&di);
564
565 if (booted_device) {
566 DPRINTF(("%s: booted_device: %s\n", __func__, device_xname(booted_device)));
567 return;
568 }
569
570 /*
571 * not a boot device name, pass through to MI code
572 */
573 if (xcp.xcp_bootdev[0] != '\0') {
574 strlcpy(bootspecbuf, xcp.xcp_bootdev, sizeof(bootspecbuf));
575 bootspec = bootspecbuf;
576 booted_method = "bootinfo/bootspec";
577 DPRINTF(("%s: bootspec: %s\n", __func__, bootspec));
578 return;
579 }
580 }
581