xref: /openbsd-src/usr.sbin/ldomctl/config.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: config.c,v 1.34 2020/02/21 19:39:28 kn Exp $	*/
2 
3 /*
4  * Copyright (c) 2012, 2018 Mark Kettenis
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/queue.h>
21 #include <assert.h>
22 #include <err.h>
23 #include <stdarg.h>
24 #include <stdbool.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include "mdesc.h"
30 #include "ldomctl.h"
31 #include "ldom_util.h"
32 
33 #define LDC_GUEST	0
34 #define LDC_HV		1
35 #define LDC_SP		2
36 
37 #define LDC_HVCTL_SVC	1
38 #define LDC_CONSOLE_SVC	2
39 
40 #define MAX_STRANDS_PER_CORE	16
41 
42 struct core {
43 	struct guest *guests[MAX_STRANDS_PER_CORE];
44 	TAILQ_ENTRY(core) link;
45 };
46 
47 TAILQ_HEAD(, core) cores;
48 
49 struct component {
50 	const char *path;
51 	int assigned;
52 
53 	struct md_node *hv_node;
54 	TAILQ_ENTRY(component) link;
55 };
56 
57 TAILQ_HEAD(, component) components;
58 
59 struct hostbridge {
60 	const char *path;
61 
62 	uint64_t num_msi_eqs;
63 	uint64_t num_msis;
64 	uint64_t max_vpcis;
65 	TAILQ_ENTRY(hostbridge) link;
66 };
67 
68 TAILQ_HEAD(, hostbridge) hostbridges;
69 
70 struct frag {
71 	TAILQ_ENTRY(frag) link;
72 	uint64_t base;
73 };
74 
75 struct guest **guests;
76 struct console **consoles;
77 struct cpu **cpus;
78 struct device **pcie_busses;
79 struct device **network_devices;
80 struct mblock **mblocks;
81 struct ldc_endpoint **ldc_endpoints;
82 
83 TAILQ_HEAD(, rootcomplex) rootcomplexes;
84 
85 uint64_t max_cpus;
86 bool have_cwqs;
87 bool have_rngs;
88 
89 uint64_t max_guests;
90 uint64_t max_hv_ldcs;
91 uint64_t max_guest_ldcs;
92 uint64_t md_maxsize;
93 uint64_t md_elbow_room;
94 uint64_t max_mblocks;
95 uint64_t directio_capability;
96 
97 uint64_t max_devices = 16;
98 
99 uint64_t rombase;
100 uint64_t romsize;
101 uint64_t uartbase;
102 
103 uint64_t max_page_size;
104 
105 uint64_t content_version;
106 uint64_t stick_frequency;
107 uint64_t tod_frequency;
108 uint64_t tod;
109 uint64_t erpt_pa;
110 uint64_t erpt_size;
111 
112 struct md *pri;
113 struct md *hvmd;
114 struct md *protomd;
115 
116 struct guest *guest_lookup(const char *);
117 void guest_prune_phys_io(struct guest *);
118 void guest_prune_pcie(struct guest *, struct md_node *, const char *);
119 void guest_add_vpcie(struct guest *, uint64_t);
120 void guest_fixup_phys_io(struct guest *);
121 
122 TAILQ_HEAD(, frag) free_frags = TAILQ_HEAD_INITIALIZER(free_frags);
123 TAILQ_HEAD(, cpu) free_cpus = TAILQ_HEAD_INITIALIZER(free_cpus);
124 int total_cpus;
125 TAILQ_HEAD(, mblock) free_memory = TAILQ_HEAD_INITIALIZER(free_memory);
126 uint64_t total_memory;
127 
128 struct cpu *
129 pri_find_cpu(uint64_t pid)
130 {
131 	struct cpu *cpu = NULL;
132 
133 	TAILQ_FOREACH(cpu, &free_cpus, link) {
134 		if (cpu->pid == pid)
135 			break;
136 	}
137 
138 	return cpu;
139 }
140 
141 void
142 pri_link_core(struct md *md, struct md_node *node, struct core *core)
143 {
144 	struct md_node *node2;
145 	struct md_prop *prop;
146 	struct cpu *cpu;
147 	uint64_t pid;
148 
149 	TAILQ_FOREACH(prop, &node->prop_list, link) {
150 		if (prop->tag == MD_PROP_ARC &&
151 		    strcmp(prop->name->str, "back") == 0) {
152 			node2 = prop->d.arc.node;
153 			if (strcmp(node2->name->str, "cpu") != 0) {
154 				pri_link_core(md, node2, core);
155 				continue;
156 			}
157 
158 			pid = -1;
159 			if (!md_get_prop_val(md, node2, "pid", &pid))
160 				md_get_prop_val(md, node2, "id", &pid);
161 
162 			cpu = pri_find_cpu(pid);
163 			if (cpu == NULL)
164 				errx(1, "couldn't determine core for VCPU %lld\n", pid);
165 			cpu->core = core;
166 		}
167 	}
168 }
169 
170 void
171 pri_add_core(struct md *md, struct md_node *node)
172 {
173 	struct core *core;
174 
175 	core = xzalloc(sizeof(*core));
176 	TAILQ_INSERT_TAIL(&cores, core, link);
177 
178 	pri_link_core(md, node, core);
179 }
180 
181 void
182 pri_init_cores(struct md *md)
183 {
184 	struct md_node *node;
185 	const void *type;
186 	size_t len;
187 
188 	TAILQ_INIT(&cores);
189 
190 	TAILQ_FOREACH(node, &md->node_list, link) {
191 		if (strcmp(node->name->str, "tlb") == 0 &&
192 		    md_get_prop_data(md, node, "type", &type, &len) &&
193 		    strcmp(type, "data") == 0) {
194 			pri_add_core(md, node);
195 		}
196 	}
197 }
198 
199 void
200 pri_add_hostbridge(struct md *md, struct md_node *node)
201 {
202 	struct hostbridge *hostbridge;
203 
204 	hostbridge = xzalloc(sizeof(*hostbridge));
205 	md_get_prop_str(md, node, "path", &hostbridge->path);
206 	md_get_prop_val(md, node, "#msi-eqs", &hostbridge->num_msi_eqs);
207 	md_get_prop_val(md, node, "#msi", &hostbridge->num_msis);
208 	if (!md_get_prop_val(md, node, "#max-vpcis", &hostbridge->max_vpcis))
209 		hostbridge->max_vpcis = 10;
210 	TAILQ_INSERT_TAIL(&hostbridges, hostbridge, link);
211 }
212 
213 void
214 pri_init_components(struct md *md)
215 {
216 	struct component *component;
217 	struct md_node *node;
218 	const char *path;
219 	const char *type;
220 
221 	TAILQ_INIT(&components);
222 	TAILQ_INIT(&hostbridges);
223 
224 	TAILQ_FOREACH(node, &md->node_list, link) {
225 		if (strcmp(node->name->str, "component") != 0)
226 			continue;
227 
228 		if (md_get_prop_str(md, node, "assignable-path", &path)) {
229 			component = xzalloc(sizeof(*component));
230 			component->path = path;
231 			TAILQ_INSERT_TAIL(&components, component, link);
232 		}
233 
234 		if (md_get_prop_str(md, node, "type", &type) &&
235 		    strcmp(type, "hostbridge") == 0)
236 			pri_add_hostbridge(md, node);
237 	}
238 }
239 
240 void
241 pri_init_phys_io(struct md *md)
242 {
243 	struct md_node *node;
244 	const char *device_type;
245 	uint64_t cfg_handle;
246 	struct rootcomplex *rootcomplex;
247 	char *path;
248 	size_t len;
249 
250 	TAILQ_INIT(&rootcomplexes);
251 
252 	TAILQ_FOREACH(node, &md->node_list, link) {
253 		if (strcmp(node->name->str, "iodevice") == 0 &&
254 		    md_get_prop_str(md, node, "device-type", &device_type) &&
255 		    strcmp(device_type, "pciex") == 0) {
256 			if (!md_get_prop_val(md, node, "cfg-handle",
257 					     &cfg_handle))
258 				continue;
259 
260 			rootcomplex = xzalloc(sizeof(*rootcomplex));
261 			md_get_prop_val(md, node, "#msi-eqs",
262 			    &rootcomplex->num_msi_eqs);
263 			md_get_prop_val(md, node, "#msi",
264 			    &rootcomplex->num_msis);
265 			md_get_prop_data(md, node, "msi-ranges",
266 			    &rootcomplex->msi_ranges, &len);
267 			rootcomplex->num_msi_ranges =
268 			    len / (2 * sizeof(uint64_t));
269 			md_get_prop_data(md, node, "virtual-dma",
270 			    &rootcomplex->vdma_ranges, &len);
271 			rootcomplex->num_vdma_ranges =
272 			    len / (2 * sizeof(uint64_t));
273 			rootcomplex->cfghandle = cfg_handle;
274 			xasprintf(&path, "/@%llx", cfg_handle);
275 			rootcomplex->path = path;
276 			TAILQ_INSERT_TAIL(&rootcomplexes, rootcomplex, link);
277 		}
278 	}
279 }
280 
281 void
282 pri_add_cpu(struct md *md, struct md_node *node)
283 {
284 	struct cpu *cpu;
285 	uint64_t mmu_page_size_list;
286 	uint64_t page_size;
287 
288 	cpu = xzalloc(sizeof(*cpu));
289 	/*
290 	 * Only UltraSPARC T1 CPUs have a "pid" property.  All other
291 	 * just have a "id" property that can be used as the physical ID.
292 	 */
293 	if (!md_get_prop_val(md, node, "pid", &cpu->pid))
294 		md_get_prop_val(md, node, "id", &cpu->pid);
295 	cpu->vid = -1;
296 	cpu->gid = -1;
297 	cpu->partid = -1;
298 	cpu->resource_id = -1;
299 	TAILQ_INSERT_TAIL(&free_cpus, cpu, link);
300 	total_cpus++;
301 
302 	mmu_page_size_list = 0x9;
303 	md_get_prop_val(md, node, "mmu-page-size-list", &mmu_page_size_list);
304 
305 	page_size = 1024;
306 	while (mmu_page_size_list) {
307 		page_size *= 8;
308 		mmu_page_size_list >>= 1;
309 	}
310 
311 	if (page_size > max_page_size)
312 		max_page_size = page_size;
313 }
314 
315 struct cpu *
316 pri_alloc_cpu(uint64_t pid)
317 {
318 	struct cpu *cpu;
319 
320 	if (pid == -1 && !TAILQ_EMPTY(&free_cpus)) {
321 		cpu = TAILQ_FIRST(&free_cpus);
322 		TAILQ_REMOVE(&free_cpus, cpu, link);
323 		return cpu;
324 	}
325 
326 	TAILQ_FOREACH(cpu, &free_cpus, link) {
327 		if (cpu->pid == pid) {
328 			TAILQ_REMOVE(&free_cpus, cpu, link);
329 			return cpu;
330 		}
331 	}
332 
333 	return NULL;
334 }
335 
336 void
337 pri_free_cpu(struct cpu *cpu)
338 {
339 	TAILQ_INSERT_TAIL(&free_cpus, cpu, link);
340 }
341 
342 void
343 pri_add_mblock(struct md *md, struct md_node *node)
344 {
345 	struct mblock *mblock;
346 
347 	mblock = xzalloc(sizeof(*mblock));
348 	md_get_prop_val(md, node, "base", &mblock->membase);
349 	md_get_prop_val(md, node, "size", &mblock->memsize);
350 	mblock->resource_id = -1;
351 	TAILQ_INSERT_TAIL(&free_memory, mblock, link);
352 	total_memory += mblock->memsize;
353 }
354 
355 struct mblock *
356 pri_alloc_memory(uint64_t base, uint64_t size)
357 {
358 	struct mblock *mblock, *new_mblock;
359 	uint64_t memend;
360 
361 	if (base == -1 && !TAILQ_EMPTY(&free_memory)) {
362 		mblock = TAILQ_FIRST(&free_memory);
363 		base = mblock->membase;
364 	}
365 
366 	TAILQ_FOREACH(mblock, &free_memory, link) {
367 		if (base >= mblock->membase &&
368 		    base < mblock->membase + mblock->memsize) {
369 			if (base > mblock->membase) {
370 				new_mblock = xzalloc(sizeof(*new_mblock));
371 				new_mblock->membase = mblock->membase;
372 				new_mblock->memsize = base - mblock->membase;
373 				new_mblock->resource_id = -1;
374 				TAILQ_INSERT_BEFORE(mblock, new_mblock, link);
375 			}
376 
377 			memend = mblock->membase + mblock->memsize;
378 			mblock->membase = base + size;
379 			mblock->memsize = memend - mblock->membase;
380 			if (mblock->memsize == 0) {
381 				TAILQ_REMOVE(&free_memory, mblock, link);
382 				free(mblock);
383 			}
384 
385 			total_memory -= size;
386 
387 			new_mblock = xzalloc(sizeof(*new_mblock));
388 			new_mblock->membase = base;
389 			new_mblock->memsize = size;
390 			new_mblock->resource_id = -1;
391 			return new_mblock;
392 		}
393 	}
394 
395 	return NULL;
396 }
397 
398 void
399 pri_delete_devalias(struct md *md)
400 {
401 	struct md_node *node;
402 
403 	/*
404 	 * There may be multiple "devalias" nodes.  Only remove the one
405 	 * that resides under the "openboot" node.
406 	 */
407 	node = md_find_node(protomd, "openboot");
408 	assert(node);
409 	node = md_find_subnode(protomd, node, "devalias");
410 	if (node)
411 		md_delete_node(protomd, node);
412 }
413 
414 void
415 pri_init(struct md *md)
416 {
417 	struct md_node *node, *node2;
418 	struct md_prop *prop;
419 	uint64_t base, size;
420 	uint64_t offset, guest_use;
421 
422 	node = md_find_node(pri, "platform");
423 	if (node == NULL)
424 		errx(1, "platform node not found");
425 
426 	md_get_prop_val(md, node, "max-cpus", &max_cpus);
427 
428 	node = md_find_node(pri, "firmware");
429 	if (node == NULL)
430 		errx(1, "firmware node not found");
431 
432 	md_get_prop_val(md, node, "max_guests", &max_guests);
433 	md_get_prop_val(md, node, "max_hv_ldcs", &max_hv_ldcs);
434 	md_get_prop_val(md, node, "max_guest_ldcs", &max_guest_ldcs);
435 	md_get_prop_val(md, node, "md_elbow_room", &md_elbow_room);
436 	md_get_prop_val(md, node, "max_mblocks", &max_mblocks);
437 	md_get_prop_val(md, node, "directio_capability", &directio_capability);
438 
439 	node = md_find_node(md, "read_only_memory");
440 	if (node == NULL)
441 		errx(1, "read_only_memory node not found");
442 	if (!md_get_prop_val(md, node, "base", &base))
443 		errx(1, "missing base property in read_only_memory node");
444 	if (!md_get_prop_val(md, node, "size", &size))
445 		errx(1, "missing size property in read_only_memory node");
446 	TAILQ_FOREACH(prop, &node->prop_list, link) {
447 		if (prop->tag == MD_PROP_ARC &&
448 		    strcmp(prop->name->str, "fwd") == 0) {
449 			node2 = prop->d.arc.node;
450 			if (!md_get_prop_val(md, node2, "guest_use",
451 			    &guest_use) || guest_use == 0)
452 				continue;
453 			if (!md_get_prop_val(md, node2, "offset", &offset) ||
454 			    !md_get_prop_val(md, node2, "size", &size))
455 				continue;
456 			rombase = base + offset;
457 			romsize = size;
458 		}
459 	}
460 	if (romsize == 0)
461 		errx(1, "no suitable firmware image found");
462 
463 	node = md_find_node(md, "platform");
464 	assert(node);
465 	md_set_prop_val(md, node, "domaining-enabled", 0x1);
466 
467 	md_write(md, "pri");
468 
469 	protomd = md_copy(md);
470 	md_find_delete_node(protomd, "components");
471 	md_find_delete_node(protomd, "domain-services");
472 	md_find_delete_node(protomd, "channel-devices");
473 	md_find_delete_node(protomd, "channel-endpoints");
474 	md_find_delete_node(protomd, "firmware");
475 	md_find_delete_node(protomd, "ldc_endpoints");
476 	md_find_delete_node(protomd, "memory-segments");
477 	pri_delete_devalias(protomd);
478 	md_collect_garbage(protomd);
479 	md_write(protomd, "protomd");
480 
481 	guests = xzalloc(max_guests * sizeof(*guests));
482 	consoles = xzalloc(max_guests * sizeof(*consoles));
483 	cpus = xzalloc(max_cpus * sizeof(*cpus));
484 	pcie_busses = xzalloc(max_devices * sizeof(*pcie_busses));
485 	network_devices = xzalloc(max_devices * sizeof(*network_devices));
486 	mblocks = xzalloc(max_mblocks * sizeof(*mblocks));
487 	ldc_endpoints = xzalloc(max_guest_ldcs * sizeof(*ldc_endpoints));
488 
489 	node = md_find_node(md, "cpus");
490 	TAILQ_FOREACH(prop, &node->prop_list, link) {
491 		if (prop->tag == MD_PROP_ARC &&
492 		    strcmp(prop->name->str, "fwd") == 0)
493 			pri_add_cpu(md, prop->d.arc.node);
494 	}
495 
496 	node = md_find_node(md, "memory");
497 	TAILQ_FOREACH(prop, &node->prop_list, link) {
498 		if (prop->tag == MD_PROP_ARC &&
499 		    strcmp(prop->name->str, "fwd") == 0)
500 			pri_add_mblock(md, prop->d.arc.node);
501 	}
502 
503 	pri_init_cores(md);
504 	pri_init_components(md);
505 	pri_init_phys_io(md);
506 }
507 
508 void
509 hvmd_fixup_guest(struct md *md, struct md_node *guest, struct md_node *node)
510 {
511 	struct md_prop *prop;
512 
513 	TAILQ_FOREACH(prop, &guest->prop_list, link) {
514 		if (prop->tag == MD_PROP_ARC &&
515 		    strcmp(prop->name->str, "fwd") == 0) {
516 			if (prop->d.arc.node == node)
517 				return;
518 		}
519 	}
520 
521 	md_add_prop_arc(md, guest, "fwd", node);
522 }
523 
524 uint64_t fragsize;
525 TAILQ_HEAD(, mblock) frag_mblocks;
526 struct mblock *hvmd_mblock;
527 
528 void
529 hvmd_init_frag(struct md *md, struct md_node *node)
530 {
531 	struct frag *frag;
532 	struct mblock *mblock;
533 	uint64_t base, size;
534 
535 	md_get_prop_val(md, node, "base", &base);
536 	md_get_prop_val(md, node, "size", &size);
537 
538 	pri_alloc_memory(base, size);
539 
540 	mblock = xzalloc(sizeof(*mblock));
541 	mblock->membase = base;
542 	mblock->memsize = size;
543 	TAILQ_INSERT_TAIL(&frag_mblocks, mblock, link);
544 
545 	while (size > fragsize) {
546 		frag = xmalloc(sizeof(*frag));
547 		frag->base = base;
548 		TAILQ_INSERT_TAIL(&free_frags, frag, link);
549 		base += fragsize;
550 		size -= fragsize;
551 	}
552 }
553 
554 uint64_t
555 hvmd_alloc_frag(uint64_t base)
556 {
557 	struct frag *frag = TAILQ_FIRST(&free_frags);
558 
559 	if (base != -1) {
560 		TAILQ_FOREACH(frag, &free_frags, link) {
561 			if (frag->base == base)
562 				break;
563 		}
564 	}
565 
566 	if (frag == NULL)
567 		return -1;
568 
569 	TAILQ_REMOVE(&free_frags, frag, link);
570 	base = frag->base;
571 	free(frag);
572 
573 	return base;
574 }
575 
576 void
577 hvmd_free_frag(uint64_t base)
578 {
579 	struct frag *frag;
580 
581 	frag = xmalloc(sizeof(*frag));
582 	frag->base = base;
583 	TAILQ_INSERT_TAIL(&free_frags, frag, link);
584 }
585 
586 void
587 hvmd_init_mblock(struct md *md, struct md_node *node)
588 {
589 	struct mblock *mblock;
590 	uint64_t resource_id;
591 	struct md_node *node2;
592 	struct md_prop *prop;
593 
594 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
595 		errx(1, "missing resource_id property in mblock node");
596 
597 	if (resource_id >= max_mblocks)
598 		errx(1, "resource_id larger than max_mblocks");
599 
600 	mblock = xzalloc(sizeof(*mblock));
601 	md_get_prop_val(md, node, "membase", &mblock->membase);
602 	md_get_prop_val(md, node, "memsize", &mblock->memsize);
603 	md_get_prop_val(md, node, "realbase", &mblock->realbase);
604 	mblock->resource_id = resource_id;
605 	mblocks[resource_id] = mblock;
606 	mblock->hv_node = node;
607 
608 	/* Fixup missing links. */
609 	TAILQ_FOREACH(prop, &node->prop_list, link) {
610 		if (prop->tag == MD_PROP_ARC &&
611 		    strcmp(prop->name->str, "back") == 0) {
612 			node2 = prop->d.arc.node;
613 			if (strcmp(node2->name->str, "guest") == 0)
614 				hvmd_fixup_guest(md, node2, node);
615 		}
616 	}
617 }
618 
619 void
620 hvmd_init_console(struct md *md, struct md_node *node)
621 {
622 	struct console *console;
623 	uint64_t resource_id;
624 
625 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
626 		errx(1, "missing resource_id property in console node");
627 
628 	if (resource_id >= max_guests)
629 		errx(1, "resource_id larger than max_guests");
630 
631 	console = xzalloc(sizeof(*console));
632 	md_get_prop_val(md, node, "ino", &console->ino);
633 	md_get_prop_val(md, node, "uartbase", &console->uartbase);
634 	console->resource_id = resource_id;
635 	consoles[resource_id] = console;
636 	console->hv_node = node;
637 }
638 
639 void
640 hvmd_init_cpu(struct md *md, struct md_node *node)
641 {
642 	struct cpu *cpu;
643 	uint64_t pid;
644 	uint64_t resource_id;
645 	struct md_node *node2;
646 	struct md_prop *prop;
647 
648 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
649 		errx(1, "missing resource_id property in cpu node");
650 
651 	if (resource_id >= max_cpus)
652 		errx(1, "resource_id larger than max-cpus");
653 
654 	if (!md_get_prop_val(md, node, "pid", &pid))
655 		errx(1, "missing pid property in cpu node");
656 
657 	cpu = pri_alloc_cpu(pid);
658 	md_get_prop_val(md, node, "vid", &cpu->vid);
659 	if (!md_get_prop_val(md, node, "gid", &cpu->gid))
660 		cpu->gid = 0;
661 	md_get_prop_val(md, node, "partid", &cpu->partid);
662 	cpu->resource_id = resource_id;
663 	cpus[resource_id] = cpu;
664 	cpu->hv_node = node;
665 
666 	/* Fixup missing links. */
667 	TAILQ_FOREACH(prop, &node->prop_list, link) {
668 		if (prop->tag == MD_PROP_ARC &&
669 		    strcmp(prop->name->str, "back") == 0) {
670 			node2 = prop->d.arc.node;
671 			if (strcmp(node2->name->str, "guest") == 0)
672 				hvmd_fixup_guest(md, node2, node);
673 		}
674 	}
675 }
676 
677 void
678 hvmd_init_device(struct md *md, struct md_node *node)
679 {
680 	struct hostbridge *hostbridge;
681 	struct device *device;
682 	uint64_t resource_id;
683 	struct md_node *node2;
684 	struct md_prop *prop;
685 	char *path;
686 
687 	if (strcmp(node->name->str, "pcie_bus") != 0 &&
688 	    strcmp(node->name->str, "network_device") != 0)
689 		return;
690 
691 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
692 		errx(1, "missing resource_id property in ldc_endpoint node");
693 
694 	if (resource_id >= max_devices)
695 		errx(1, "resource_id larger than max_devices");
696 
697 	device = xzalloc(sizeof(*device));
698 	md_get_prop_val(md, node, "gid", &device->gid);
699 	md_get_prop_val(md, node, "cfghandle", &device->cfghandle);
700 	md_get_prop_val(md, node, "rcid", &device->rcid);
701 	device->resource_id = resource_id;
702 	if (strcmp(node->name->str, "pcie_bus") == 0)
703 		pcie_busses[resource_id] = device;
704 	else
705 		network_devices[resource_id] = device;
706 	device->hv_node = node;
707 
708 	/* Fixup missing links. */
709 	TAILQ_FOREACH(prop, &node->prop_list, link) {
710 		if (prop->tag == MD_PROP_ARC &&
711 		    strcmp(prop->name->str, "back") == 0) {
712 			node2 = prop->d.arc.node;
713 			if (strcmp(node2->name->str, "guest") == 0)
714 				hvmd_fixup_guest(md, node2, node);
715 		}
716 	}
717 
718 	xasprintf(&path, "/@%llx", device->cfghandle);
719 	TAILQ_FOREACH(hostbridge, &hostbridges, link) {
720 		if (strcmp(hostbridge->path, path) == 0)
721 			break;
722 	}
723 	free(path);
724 	if (hostbridge == NULL)
725 		return;
726 
727 	device->msi_eqs_per_vpci =
728 	    hostbridge->num_msi_eqs / hostbridge->max_vpcis;
729 	device->msis_per_vpci =
730 	    hostbridge->num_msis / hostbridge->max_vpcis;
731 	device->msi_base = hostbridge->num_msis;
732 
733 	device->num_msi_eqs = device->msi_eqs_per_vpci +
734 	    hostbridge->num_msi_eqs % hostbridge->max_vpcis;
735 	device->num_msis = device->msis_per_vpci +
736 	    hostbridge->num_msis % hostbridge->max_vpcis;
737 	device->msi_ranges[0] = 0;
738 	device->msi_ranges[1] = device->num_msis;
739 }
740 
741 void
742 hvmd_init_endpoint(struct md *md, struct md_node *node)
743 {
744 	struct ldc_endpoint *endpoint;
745 	uint64_t resource_id;
746 
747 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
748 		errx(1, "missing resource_id property in ldc_endpoint node");
749 
750 	if (resource_id >= max_guest_ldcs)
751 		errx(1, "resource_id larger than max_guest_ldcs");
752 
753 	if (ldc_endpoints[resource_id]) {
754 		/*
755 		 * Some machine descriptions seem to have duplicate
756 		 * arcs.  Fortunately, these can be easily detected
757 		 * and ignored.
758 		 */
759 		if (ldc_endpoints[resource_id]->hv_node == node)
760 			return;
761 		errx(1, "duplicate resource_id");
762 	}
763 
764 	endpoint = xzalloc(sizeof(*endpoint));
765 	endpoint->target_guest = -1;
766 	endpoint->tx_ino = -1;
767 	endpoint->rx_ino = -1;
768 	endpoint->private_svc = -1;
769 	endpoint->svc_id = -1;
770 	md_get_prop_val(md, node, "target_type", &endpoint->target_type);
771 	md_get_prop_val(md, node, "target_guest", &endpoint->target_guest);
772 	md_get_prop_val(md, node, "channel", &endpoint->channel);
773 	md_get_prop_val(md, node, "target_channel", &endpoint->target_channel);
774 	md_get_prop_val(md, node, "tx-ino", &endpoint->tx_ino);
775 	md_get_prop_val(md, node, "rx-ino", &endpoint->rx_ino);
776 	md_get_prop_val(md, node, "private_svc", &endpoint->private_svc);
777 	md_get_prop_val(md, node, "svc_id", &endpoint->svc_id);
778 	endpoint->resource_id = resource_id;
779 	ldc_endpoints[resource_id] = endpoint;
780 	endpoint->hv_node = node;
781 }
782 
783 void
784 hvmd_init_guest(struct md *md, struct md_node *node)
785 {
786 	struct guest *guest;
787 	struct md_node *node2;
788 	struct md_prop *prop;
789 	uint64_t resource_id;
790 	struct ldc_endpoint *endpoint;
791 	char *path;
792 
793 	if (!md_get_prop_val(md, node, "resource_id", &resource_id))
794 		errx(1, "missing resource_id property in guest node");
795 
796 	if (resource_id >= max_guests)
797 		errx(1, "resource_id larger than max_guests");
798 
799 	guest = xzalloc(sizeof(*guest));
800 	TAILQ_INIT(&guest->cpu_list);
801 	TAILQ_INIT(&guest->device_list);
802 	TAILQ_INIT(&guest->subdevice_list);
803 	TAILQ_INIT(&guest->mblock_list);
804 	TAILQ_INIT(&guest->endpoint_list);
805 	md_get_prop_str(md, node, "name", &guest->name);
806 	md_get_prop_val(md, node, "gid", &guest->gid);
807 	md_get_prop_val(md, node, "pid", &guest->pid);
808 	md_get_prop_val(md, node, "tod-offset", &guest->tod_offset);
809 	md_get_prop_val(md, node, "perfctraccess", &guest->perfctraccess);
810 	md_get_prop_val(md, node, "perfctrhtaccess", &guest->perfctrhtaccess);
811 	md_get_prop_val(md, node, "rngctlaccessible", &guest->rngctlaccessible);
812 	md_get_prop_val(md, node, "mdpa", &guest->mdpa);
813 	guest->resource_id = resource_id;
814 	guests[resource_id] = guest;
815 	guest->hv_node = node;
816 
817 	if (strcmp(guest->name, "primary") == 0 && guest->gid != 0)
818 		errx(1, "gid of primary domain isn't 0");
819 
820 	hvmd_alloc_frag(guest->mdpa);
821 
822 	TAILQ_FOREACH(prop, &node->prop_list, link) {
823 		if (prop->tag == MD_PROP_ARC &&
824 		    strcmp(prop->name->str, "fwd") == 0) {
825 			node2 = prop->d.arc.node;
826 			if (strcmp(node2->name->str, "console") == 0) {
827 				md_get_prop_val(md, node2, "resource_id",
828 				    &resource_id);
829 				guest->console = consoles[resource_id];
830 				consoles[resource_id]->guest = guest;
831 			}
832 			if (strcmp(node2->name->str, "cpu") == 0) {
833 				md_get_prop_val(md, node2, "resource_id",
834 				    &resource_id);
835 				TAILQ_INSERT_TAIL(&guest->cpu_list,
836 				    cpus[resource_id], link);
837 				cpus[resource_id]->guest = guest;
838 			}
839 			if (strcmp(node2->name->str, "pcie_bus") == 0) {
840 				md_get_prop_val(md, node2, "resource_id",
841 				    &resource_id);
842 				TAILQ_INSERT_TAIL(&guest->device_list,
843 				    pcie_busses[resource_id], link);
844 				pcie_busses[resource_id]->guest = guest;
845 			}
846 			if (strcmp(node2->name->str, "network_device") == 0) {
847 				md_get_prop_val(md, node2, "resource_id",
848 				    &resource_id);
849 				TAILQ_INSERT_TAIL(&guest->device_list,
850 				    network_devices[resource_id], link);
851 				network_devices[resource_id]->guest = guest;
852 			}
853 			if (strcmp(node2->name->str, "mblock") == 0) {
854 				md_get_prop_val(md, node2, "resource_id",
855 				    &resource_id);
856 				TAILQ_INSERT_TAIL(&guest->mblock_list,
857 				    mblocks[resource_id], link);
858 				mblocks[resource_id]->guest = guest;
859 			}
860 			if (strcmp(node2->name->str, "ldc_endpoint") == 0) {
861 				md_get_prop_val(md, node2, "resource_id",
862 				    &resource_id);
863 				TAILQ_INSERT_TAIL(&guest->endpoint_list,
864 				    ldc_endpoints[resource_id], link);
865 				ldc_endpoints[resource_id]->guest = guest;
866 			}
867 		}
868 	}
869 
870 	TAILQ_FOREACH(endpoint, &guest->endpoint_list, link) {
871 		if (endpoint->channel >= guest->endpoint_id)
872 			guest->endpoint_id = endpoint->channel + 1;
873 	}
874 
875 	xasprintf(&path, "%s.md", guest->name);
876 	guest->md = md_read(path);
877 
878 	if (guest->md == NULL)
879 		err(1, "unable to get guest MD");
880 
881 	free(path);
882 }
883 
884 void
885 hvmd_init(struct md *md)
886 {
887 	struct md_node *node;
888 	struct md_prop *prop;
889 
890 	node = md_find_node(md, "root");
891 	md_get_prop_val(md, node, "content-version", &content_version);
892 	md_get_prop_val(md, node, "stick-frequency", &stick_frequency);
893 	md_get_prop_val(md, node, "tod-frequency", &tod_frequency);
894 	md_get_prop_val(md, node, "tod", &tod);
895 	md_get_prop_val(md, node, "erpt-pa", &erpt_pa);
896 	md_get_prop_val(md, node, "erpt-size", &erpt_size);
897 	md_get_prop_val(md, node, "uartbase", &uartbase);
898 
899 	node = md_find_node(md, "platform");
900 	if (node)
901 		md_get_prop_val(md, node, "stick-frequency", &stick_frequency);
902 
903 	node = md_find_node(md, "hvmd_mblock");
904 	if (node) {
905 		hvmd_mblock = xzalloc(sizeof(*hvmd_mblock));
906 		md_get_prop_val(md, node, "base", &hvmd_mblock->membase);
907 		md_get_prop_val(md, node, "size", &hvmd_mblock->memsize);
908 		md_get_prop_val(md, node, "md_maxsize", &md_maxsize);
909 		pri_alloc_memory(hvmd_mblock->membase, hvmd_mblock->memsize);
910 	}
911 
912 	node = md_find_node(md, "frag_space");
913 	md_get_prop_val(md, node, "fragsize", &fragsize);
914 	if (fragsize == 0)
915 		fragsize = md_maxsize;
916 	TAILQ_INIT(&frag_mblocks);
917 	TAILQ_FOREACH(prop, &node->prop_list, link) {
918 		if (prop->tag == MD_PROP_ARC &&
919 		    strcmp(prop->name->str, "fwd") == 0)
920 			hvmd_init_frag(md, prop->d.arc.node);
921 	}
922 	pri_alloc_memory(0, fragsize);
923 
924 	node = md_find_node(md, "consoles");
925 	TAILQ_FOREACH(prop, &node->prop_list, link) {
926 		if (prop->tag == MD_PROP_ARC &&
927 		    strcmp(prop->name->str, "fwd") == 0)
928 			hvmd_init_console(md, prop->d.arc.node);
929 	}
930 
931 	node = md_find_node(md, "cpus");
932 	TAILQ_FOREACH(prop, &node->prop_list, link) {
933 		if (prop->tag == MD_PROP_ARC &&
934 		    strcmp(prop->name->str, "fwd") == 0)
935 			hvmd_init_cpu(md, prop->d.arc.node);
936 	}
937 
938 	have_cwqs = (md_find_node(md, "cwqs") != NULL);
939 	have_rngs = (md_find_node(md, "rngs") != NULL);
940 
941 	node = md_find_node(md, "devices");
942 	TAILQ_FOREACH(prop, &node->prop_list, link) {
943 		if (prop->tag == MD_PROP_ARC &&
944 		    strcmp(prop->name->str, "fwd") == 0)
945 			hvmd_init_device(md, prop->d.arc.node);
946 	}
947 
948 	node = md_find_node(md, "memory");
949 	TAILQ_FOREACH(prop, &node->prop_list, link) {
950 		if (prop->tag == MD_PROP_ARC &&
951 		    strcmp(prop->name->str, "fwd") == 0)
952 			hvmd_init_mblock(md, prop->d.arc.node);
953 	}
954 
955 	node = md_find_node(md, "ldc_endpoints");
956 	TAILQ_FOREACH(prop, &node->prop_list, link) {
957 		if (prop->tag == MD_PROP_ARC &&
958 		    strcmp(prop->name->str, "fwd") == 0)
959 			hvmd_init_endpoint(md, prop->d.arc.node);
960 	}
961 
962 	node = md_find_node(md, "guests");
963 	TAILQ_FOREACH(prop, &node->prop_list, link) {
964 		if (prop->tag == MD_PROP_ARC &&
965 		    strcmp(prop->name->str, "fwd") == 0)
966 			hvmd_init_guest(md, prop->d.arc.node);
967 	}
968 
969 	hvmd_alloc_frag(-1);
970 }
971 
972 void
973 hvmd_finalize_cpu(struct md *md, struct cpu *cpu)
974 {
975 	struct md_node *parent;
976 	struct md_node *node;
977 	int i;
978 
979 	for (i = 0; i < MAX_STRANDS_PER_CORE; i++) {
980 		if (cpu->core->guests[i] == cpu->guest) {
981 			cpu->partid = i + 1;
982 			break;
983 		}
984 		if (cpu->core->guests[i] == NULL) {
985 			cpu->core->guests[i] = cpu->guest;
986 			cpu->partid = i + 1;
987 			break;
988 		}
989 	}
990 
991 	parent = md_find_node(md, "cpus");
992 	assert(parent);
993 
994 	node = md_add_node(md, "cpu");
995 	md_link_node(md, parent, node);
996 	md_add_prop_val(md, node, "pid", cpu->pid);
997 	md_add_prop_val(md, node, "vid", cpu->vid);
998 	md_add_prop_val(md, node, "gid", cpu->gid);
999 	md_add_prop_val(md, node, "partid", cpu->partid);
1000 	md_add_prop_val(md, node, "resource_id", cpu->resource_id);
1001 	cpu->hv_node = node;
1002 }
1003 
1004 void
1005 hvmd_finalize_cpus(struct md *md)
1006 {
1007 	struct md_node *parent;
1008 	struct md_node *node;
1009 	uint64_t resource_id;
1010 
1011 	parent = md_find_node(md, "root");
1012 	assert(parent);
1013 
1014 	node = md_add_node(md, "cpus");
1015 	md_link_node(md, parent, node);
1016 
1017 	for (resource_id = 0; resource_id < max_cpus; resource_id++) {
1018 		if (cpus[resource_id])
1019 			hvmd_finalize_cpu(md, cpus[resource_id]);
1020 	}
1021 }
1022 
1023 void
1024 hvmd_finalize_maus(struct md *md)
1025 {
1026 	struct md_node *parent;
1027 	struct md_node *node;
1028 	struct md_node *child;
1029 	int i;
1030 
1031 	parent = md_find_node(md, "root");
1032 	assert(parent);
1033 
1034 	node = md_add_node(md, "maus");
1035 	md_link_node(md, parent, node);
1036 
1037 	if (have_cwqs) {
1038 		node = md_add_node(md, "cwqs");
1039 		md_link_node(md, parent, node);
1040 	}
1041 
1042 	if (have_rngs) {
1043 		node = md_add_node(md, "rngs");
1044 		md_link_node(md, parent, node);
1045 		child = md_add_node(md, "rng");
1046 		md_link_node(md, node, child);
1047 		for (i = 0; i < max_cpus; i++) {
1048 			if (cpus[i])
1049 				md_link_node(md, cpus[i]->hv_node, child);
1050 		}
1051 	}
1052 }
1053 
1054 void
1055 hvmd_finalize_device(struct md *md, struct device *device, const char *name)
1056 {
1057 	struct md_node *parent;
1058 	struct md_node *node;
1059 
1060 	parent = md_find_node(md, "devices");
1061 	assert(parent);
1062 
1063 	node = md_add_node(md, name);
1064 	md_link_node(md, parent, node);
1065 	md_add_prop_val(md, node, "resource_id", device->resource_id);
1066 	md_add_prop_val(md, node, "cfghandle", device->cfghandle);
1067 	md_add_prop_val(md, node, "gid", device->gid);
1068 	md_add_prop_val(md, node, "rcid", device->rcid);
1069 	device->hv_node = node;
1070 }
1071 
1072 void
1073 hvmd_finalize_pcie_device(struct md *md, struct device *device)
1074 {
1075 	struct rootcomplex *rootcomplex;
1076 	struct md_node *node, *child, *parent;
1077 	struct component *component;
1078 	struct subdevice *subdevice;
1079 	uint64_t resource_id = 0;
1080 	char *path;
1081 
1082 	hvmd_finalize_device(md, device,
1083 	    device->virtual ? "virtual_pcie_bus" : "pcie_bus");
1084 	node = device->hv_node;
1085 
1086 	if (!directio_capability)
1087 		return;
1088 
1089 	TAILQ_FOREACH(rootcomplex, &rootcomplexes, link) {
1090 		if (rootcomplex->cfghandle == device->cfghandle)
1091 			break;
1092 	}
1093 	if (rootcomplex == NULL)
1094 		return;
1095 
1096 	md_add_prop_val(md, node, "allow_bypass", 0);
1097 
1098 	md_add_prop_val(md, node, "#msi-eqs", device->num_msi_eqs);
1099 	md_add_prop_val(md, node, "#msi", device->num_msis);
1100 	md_add_prop_data(md, node, "msi-ranges", (void *)device->msi_ranges,
1101 	    sizeof(device->msi_ranges));
1102 	md_add_prop_data(md, node, "virtual-dma", rootcomplex->vdma_ranges,
1103 	    rootcomplex->num_vdma_ranges * 2 * sizeof(uint64_t));
1104 
1105 	xasprintf(&path, "/@%llx", device->cfghandle);
1106 
1107 	if (!device->virtual) {
1108 		parent = md_add_node(md, "pcie_assignable_devices");
1109 		md_link_node(md, node, parent);
1110 
1111 		TAILQ_FOREACH(component, &components, link) {
1112 			const char *path2 = component->path;
1113 
1114 			if (strncmp(path, path2, strlen(path)) != 0)
1115 				continue;
1116 
1117 			path2 = strchr(path2, '/');
1118 			if (path2 == NULL || *path2++ == 0)
1119 				continue;
1120 			path2 = strchr(path2, '/');
1121 			if (path2 == NULL || *path2++ == 0)
1122 				continue;
1123 
1124 			child = md_add_node(md, "pcie_device");
1125 			md_link_node(md, parent, child);
1126 
1127 			md_add_prop_str(md, child, "path", path2);
1128 			md_add_prop_val(md, child, "resource_id", resource_id);
1129 			resource_id++;
1130 
1131 			component->hv_node = child;
1132 		}
1133 	}
1134 
1135 	parent = md_add_node(md, "pcie_assigned_devices");
1136 	md_link_node(md, node, parent);
1137 
1138 	TAILQ_FOREACH(subdevice, &device->guest->subdevice_list, link) {
1139 		TAILQ_FOREACH(component, &components, link) {
1140 			if (strcmp(subdevice->path, component->path) == 0)
1141 				md_link_node(md, parent, component->hv_node);
1142 		}
1143 	}
1144 
1145 	free(path);
1146 }
1147 
1148 void
1149 hvmd_finalize_devices(struct md *md)
1150 {
1151 	struct md_node *parent;
1152 	struct md_node *node;
1153 	uint64_t resource_id;
1154 
1155 	parent = md_find_node(md, "root");
1156 	assert(parent);
1157 
1158 	node = md_add_node(md, "devices");
1159 	md_link_node(md, parent, node);
1160 
1161 	for (resource_id = 0; resource_id < max_devices; resource_id++) {
1162 		if (pcie_busses[resource_id])
1163 			hvmd_finalize_pcie_device(md, pcie_busses[resource_id]);
1164 	}
1165 	for (resource_id = 0; resource_id < max_devices; resource_id++) {
1166 		if (network_devices[resource_id])
1167 			hvmd_finalize_device(md, network_devices[resource_id],
1168 			    "network_device");
1169 	}
1170 }
1171 
1172 void
1173 hvmd_finalize_mblock(struct md *md, struct mblock *mblock)
1174 {
1175 	struct md_node *parent;
1176 	struct md_node *node;
1177 
1178 	parent = md_find_node(md, "memory");
1179 	assert(parent);
1180 
1181 	node = md_add_node(md, "mblock");
1182 	md_link_node(md, parent, node);
1183 	md_add_prop_val(md, node, "membase", mblock->membase);
1184 	md_add_prop_val(md, node, "memsize", mblock->memsize);
1185 	md_add_prop_val(md, node, "realbase", mblock->realbase);
1186 	md_add_prop_val(md, node, "resource_id", mblock->resource_id);
1187 	mblock->hv_node = node;
1188 }
1189 
1190 void
1191 hvmd_finalize_memory(struct md *md)
1192 {
1193 	struct md_node *parent;
1194 	struct md_node *node;
1195 	uint64_t resource_id;
1196 
1197 	parent = md_find_node(md, "root");
1198 	assert(parent);
1199 
1200 	node = md_add_node(md, "memory");
1201 	md_link_node(md, parent, node);
1202 
1203 	for (resource_id = 0; resource_id < max_mblocks; resource_id++) {
1204 		if (mblocks[resource_id])
1205 			hvmd_finalize_mblock(md, mblocks[resource_id]);
1206 	}
1207 }
1208 
1209 void
1210 hvmd_finalize_endpoint(struct md *md, struct ldc_endpoint *endpoint)
1211 {
1212 	struct md_node *parent;
1213 	struct md_node *node;
1214 
1215 	parent = md_find_node(md, "ldc_endpoints");
1216 	assert(parent);
1217 
1218 	node = md_add_node(md, "ldc_endpoint");
1219 	md_link_node(md, parent, node);
1220 	md_add_prop_val(md, node, "resource_id", endpoint->resource_id);
1221 	md_add_prop_val(md, node, "target_type", endpoint->target_type);
1222 	md_add_prop_val(md, node, "channel", endpoint->channel);
1223 	if (endpoint->target_guest != -1)
1224 		md_add_prop_val(md, node, "target_guest",
1225 		    endpoint->target_guest);
1226 	md_add_prop_val(md, node, "target_channel", endpoint->target_channel);
1227 	if (endpoint->tx_ino != -1)
1228 		md_add_prop_val(md, node, "tx-ino", endpoint->tx_ino);
1229 	if (endpoint->rx_ino != -1)
1230 		md_add_prop_val(md, node, "rx-ino", endpoint->rx_ino);
1231 	if (endpoint->private_svc != -1)
1232 		md_add_prop_val(md, node, "private_svc",
1233 		    endpoint->private_svc);
1234 	if (endpoint->svc_id != -1)
1235 		md_add_prop_val(md, node, "svc_id", endpoint->svc_id);
1236 	endpoint->hv_node = node;
1237 }
1238 
1239 void
1240 hvmd_finalize_endpoints(struct md *md)
1241 {
1242 	struct md_node *parent;
1243 	struct md_node *node;
1244 	uint64_t resource_id;
1245 
1246 	parent = md_find_node(md, "root");
1247 	assert(parent);
1248 
1249 	node = md_add_node(md, "ldc_endpoints");
1250 	md_link_node(md, parent, node);
1251 
1252 	for (resource_id = 0; resource_id < max_guest_ldcs; resource_id++) {
1253 		if (ldc_endpoints[resource_id])
1254 			hvmd_finalize_endpoint(md, ldc_endpoints[resource_id]);
1255 	}
1256 }
1257 
1258 void
1259 hvmd_finalize_console(struct md *md, struct console *console)
1260 {
1261 	struct md_node *parent;
1262 	struct md_node *node;
1263 	struct ldc_endpoint *endpoint;
1264 
1265 	parent = md_find_node(md, "consoles");
1266 	assert(parent);
1267 
1268 	node = md_add_node(md, "console");
1269 	md_link_node(md, parent, node);
1270 	md_add_prop_val(md, node, "resource_id", console->resource_id);
1271 	md_add_prop_val(md, node, "ino", console->ino);
1272 	console->hv_node = node;
1273 
1274 	if (console->uartbase) {
1275 		md_add_prop_val(md, node, "uartbase", console->uartbase);
1276 		return;
1277 	}
1278 
1279 	TAILQ_FOREACH(endpoint, &console->guest->endpoint_list, link) {
1280 		if (endpoint->rx_ino == console->ino) {
1281 			md_link_node(md, node, endpoint->hv_node);
1282 			break;
1283 		}
1284 	}
1285 }
1286 
1287 void
1288 hvmd_finalize_consoles(struct md *md)
1289 {
1290 	struct md_node *parent;
1291 	struct md_node *node;
1292 	uint64_t resource_id;
1293 
1294 	parent = md_find_node(md, "root");
1295 	assert(parent);
1296 
1297 	node = md_add_node(md, "consoles");
1298 	md_link_node(md, parent, node);
1299 
1300 	for (resource_id = 0; resource_id < max_guests; resource_id++) {
1301 		if (consoles[resource_id])
1302 			hvmd_finalize_console(md, consoles[resource_id]);
1303 	}
1304 }
1305 
1306 void
1307 hvmd_finalize_guest(struct md *md, struct guest *guest)
1308 {
1309 	struct md_node *node;
1310 	struct md_node *parent;
1311 	struct cpu *cpu;
1312 	struct device *device;
1313 	struct mblock *mblock;
1314 	struct ldc_endpoint *endpoint;
1315 
1316 	parent = md_find_node(md, "guests");
1317 	assert(parent);
1318 
1319 	node = md_add_node(md, "guest");
1320 	md_link_node(md, parent, node);
1321 	md_add_prop_str(md, node, "name", guest->name);
1322 	md_add_prop_val(md, node, "gid", guest->gid);
1323 	md_add_prop_val(md, node, "pid", guest->pid);
1324 	md_add_prop_val(md, node, "resource_id", guest->resource_id);
1325 	md_add_prop_val(md, node, "tod-offset", guest->tod_offset);
1326 	md_add_prop_val(md, node, "reset-reason", 0);
1327 	md_add_prop_val(md, node, "perfctraccess", guest->perfctraccess);
1328 	md_add_prop_val(md, node, "perfctrhtaccess", guest->perfctrhtaccess);
1329 	md_add_prop_val(md, node, "rngctlaccessible", guest->rngctlaccessible);
1330 	md_add_prop_val(md, node, "diagpriv", 0);
1331 	md_add_prop_val(md, node, "mdpa", guest->mdpa);
1332 	md_add_prop_val(md, node, "rombase", rombase);
1333 	md_add_prop_val(md, node, "romsize", romsize);
1334 	md_add_prop_val(md, node, "uartbase", uartbase);
1335 	guest->hv_node = node;
1336 
1337 	node = md_add_node(md, "virtual_devices");
1338 	md_link_node(md, guest->hv_node, node);
1339 	md_add_prop_val(md, node, "cfghandle", 0x100);
1340 
1341 	node = md_add_node(md, "channel_devices");
1342 	md_link_node(md, guest->hv_node, node);
1343 	md_add_prop_val(md, node, "cfghandle", 0x200);
1344 
1345 	if (guest->console)
1346 		md_link_node(md, guest->hv_node, guest->console->hv_node);
1347 	TAILQ_FOREACH(cpu, &guest->cpu_list, link)
1348 		md_link_node(md, guest->hv_node, cpu->hv_node);
1349 	TAILQ_FOREACH(device, &guest->device_list, link)
1350 		md_link_node(md, guest->hv_node, device->hv_node);
1351 	TAILQ_FOREACH(mblock, &guest->mblock_list, link)
1352 		md_link_node(md, guest->hv_node, mblock->hv_node);
1353 	TAILQ_FOREACH(endpoint, &guest->endpoint_list, link)
1354 		md_link_node(md, guest->hv_node, endpoint->hv_node);
1355 }
1356 
1357 void
1358 hvmd_finalize_guests(struct md *md)
1359 {
1360 	struct md_node *parent;
1361 	struct md_node *node;
1362 	uint64_t resource_id;
1363 
1364 	parent = md_find_node(md, "root");
1365 	assert(parent);
1366 
1367 	node = md_add_node(md, "guests");
1368 	md_link_node(md, parent, node);
1369 
1370 	for (resource_id = 0; resource_id < max_guests; resource_id++) {
1371 		if (guests[resource_id])
1372 			hvmd_finalize_guest(md, guests[resource_id]);
1373 	}
1374 }
1375 
1376 void
1377 hvmd_finalize(void)
1378 {
1379 	struct md *md;
1380 	struct md_node *node;
1381 	struct md_node *parent;
1382 	struct mblock *mblock;
1383 
1384 	md = md_alloc();
1385 	node = md_add_node(md, "root");
1386 	md_add_prop_val(md, node, "content-version", content_version);
1387 	if (content_version <= 0x100000000) {
1388 		md_add_prop_val(md, node, "stick-frequency", stick_frequency);
1389 		if (tod_frequency != 0)
1390 			md_add_prop_val(md, node, "tod-frequency",
1391 			    tod_frequency);
1392 		if (tod != 0)
1393 			md_add_prop_val(md, node, "tod", tod);
1394 		if (erpt_pa != 0)
1395 			md_add_prop_val(md, node, "erpt-pa", erpt_pa);
1396 		if (erpt_size != 0)
1397 			md_add_prop_val(md, node, "erpt-size", erpt_size);
1398 
1399 		parent = node;
1400 		node = md_add_node(md, "platform");
1401 		md_link_node(md, parent, node);
1402 		md_add_prop_val(md, node, "stick-frequency", stick_frequency);
1403 	}
1404 
1405 	parent = md_find_node(md, "root");
1406 	assert(parent);
1407 
1408 	node = md_add_node(md, "frag_space");
1409 	md_link_node(md, parent, node);
1410 	md_add_prop_val(md, node, "fragsize", fragsize);
1411 
1412 	parent = md_find_node(md, "frag_space");
1413 	TAILQ_FOREACH(mblock, &frag_mblocks, link) {
1414 		node = md_add_node(md, "frag_mblock");
1415 		md_link_node(md, parent, node);
1416 		md_add_prop_val(md, node, "base", mblock->membase);
1417 		md_add_prop_val(md, node, "size", mblock->memsize);
1418 	}
1419 
1420 	if (hvmd_mblock) {
1421 		parent = md_find_node(md, "root");
1422 		assert(parent);
1423 
1424 		node = md_add_node(md, "hvmd_mblock");
1425 		md_link_node(md, parent, node);
1426 		md_add_prop_val(md, node, "base", hvmd_mblock->membase);
1427 		md_add_prop_val(md, node, "size", hvmd_mblock->memsize);
1428 		md_add_prop_val(md, node, "md_maxsize", md_maxsize);
1429 	}
1430 
1431 	hvmd_finalize_cpus(md);
1432 	hvmd_finalize_maus(md);
1433 	hvmd_finalize_devices(md);
1434 	hvmd_finalize_memory(md);
1435 	hvmd_finalize_endpoints(md);
1436 	hvmd_finalize_consoles(md);
1437 	hvmd_finalize_guests(md);
1438 
1439 	md_write(md, "hv.md");
1440 }
1441 
1442 struct ldc_endpoint *
1443 hvmd_add_endpoint(struct guest *guest)
1444 {
1445 	struct ldc_endpoint *endpoint;
1446 	uint64_t resource_id;
1447 
1448 	for (resource_id = 0; resource_id < max_guest_ldcs; resource_id++)
1449 		if (ldc_endpoints[resource_id] == NULL)
1450 			break;
1451 	assert(resource_id < max_guest_ldcs);
1452 
1453 	endpoint = xzalloc(sizeof(*endpoint));
1454 	endpoint->target_guest = -1;
1455 	endpoint->tx_ino = -1;
1456 	endpoint->rx_ino = -1;
1457 	endpoint->private_svc = -1;
1458 	endpoint->svc_id = -1;
1459 	endpoint->resource_id = resource_id;
1460 	ldc_endpoints[resource_id] = endpoint;
1461 
1462 	TAILQ_INSERT_TAIL(&guest->endpoint_list, endpoint, link);
1463 	endpoint->guest = guest;
1464 
1465 	return endpoint;
1466 }
1467 
1468 struct console *
1469 hvmd_add_console(struct guest *guest)
1470 {
1471 	struct guest *primary;
1472 	struct console *console;
1473 	uint64_t resource_id;
1474 	uint64_t client_channel, server_channel;
1475 
1476 	primary = guest_lookup("primary");
1477 	client_channel = guest->endpoint_id++;
1478 	server_channel = primary->endpoint_id++;
1479 
1480 	for (resource_id = 0; resource_id < max_guests; resource_id++)
1481 		if (consoles[resource_id] == NULL)
1482 			break;
1483 	assert(resource_id < max_guests);
1484 
1485 	console = xzalloc(sizeof(*console));
1486 	console->ino = 0x11;
1487 	console->resource_id = resource_id;
1488 	consoles[resource_id] = console;
1489 
1490 	console->client_endpoint = hvmd_add_endpoint(guest);
1491 	console->client_endpoint->tx_ino = 0x11;
1492 	console->client_endpoint->rx_ino = 0x11;
1493 	console->client_endpoint->target_type = LDC_GUEST;
1494 	console->client_endpoint->target_guest = primary->gid;
1495 	console->client_endpoint->target_channel = server_channel;
1496 	console->client_endpoint->channel = client_channel;
1497 	console->client_endpoint->private_svc = LDC_CONSOLE_SVC;
1498 
1499 	console->server_endpoint = hvmd_add_endpoint(primary);
1500 	console->server_endpoint->tx_ino = 2 * server_channel;
1501 	console->server_endpoint->rx_ino = 2 * server_channel + 1;
1502 	console->server_endpoint->target_type = LDC_GUEST;
1503 	console->server_endpoint->target_guest = guest->gid;
1504 	console->server_endpoint->channel = server_channel;
1505 	console->server_endpoint->target_channel = client_channel;
1506 
1507 	guest->console = console;
1508 	console->guest = guest;
1509 
1510 	return console;
1511 }
1512 
1513 void
1514 hvmd_add_domain_services(struct guest *guest)
1515 {
1516 	struct guest *primary;
1517 	struct ldc_channel *ds = &guest->domain_services;
1518 	uint64_t client_channel, server_channel;
1519 
1520 	primary = guest_lookup("primary");
1521 	client_channel = guest->endpoint_id++;
1522 	server_channel = primary->endpoint_id++;
1523 
1524 	ds->client_endpoint = hvmd_add_endpoint(guest);
1525 	ds->client_endpoint->tx_ino = 2 * client_channel;
1526 	ds->client_endpoint->rx_ino = 2 * client_channel + 1;
1527 	ds->client_endpoint->target_type = LDC_GUEST;
1528 	ds->client_endpoint->target_guest = primary->gid;
1529 	ds->client_endpoint->target_channel = server_channel;
1530 	ds->client_endpoint->channel = client_channel;
1531 
1532 	ds->server_endpoint = hvmd_add_endpoint(primary);
1533 	ds->server_endpoint->tx_ino = 2 * server_channel;
1534 	ds->server_endpoint->rx_ino = 2 * server_channel + 1;
1535 	ds->server_endpoint->target_type = LDC_GUEST;
1536 	ds->server_endpoint->target_guest = guest->gid;
1537 	ds->server_endpoint->channel = server_channel;
1538 	ds->server_endpoint->target_channel = client_channel;
1539 }
1540 
1541 struct ldc_channel *
1542 hvmd_add_vio(struct guest *guest)
1543 {
1544 	struct guest *primary;
1545 	struct ldc_channel *lc = &guest->vio[guest->num_vios++];
1546 	uint64_t client_channel, server_channel;
1547 
1548 	primary = guest_lookup("primary");
1549 	client_channel = guest->endpoint_id++;
1550 	server_channel = primary->endpoint_id++;
1551 
1552 	lc->client_endpoint = hvmd_add_endpoint(guest);
1553 	lc->client_endpoint->tx_ino = 2 * client_channel;
1554 	lc->client_endpoint->rx_ino = 2 * client_channel + 1;
1555 	lc->client_endpoint->target_type = LDC_GUEST;
1556 	lc->client_endpoint->target_guest = primary->gid;
1557 	lc->client_endpoint->target_channel = server_channel;
1558 	lc->client_endpoint->channel = client_channel;
1559 
1560 	lc->server_endpoint = hvmd_add_endpoint(primary);
1561 	lc->server_endpoint->tx_ino = 2 * server_channel;
1562 	lc->server_endpoint->rx_ino = 2 * server_channel + 1;
1563 	lc->server_endpoint->target_type = LDC_GUEST;
1564 	lc->server_endpoint->target_guest = guest->gid;
1565 	lc->server_endpoint->channel = server_channel;
1566 	lc->server_endpoint->target_channel = client_channel;
1567 
1568 	return lc;
1569 }
1570 
1571 struct guest *
1572 hvmd_add_guest(const char *name)
1573 {
1574 	struct guest *guest;
1575 	uint64_t resource_id;
1576 
1577 	for (resource_id = 0; resource_id < max_guests; resource_id++)
1578 		if (guests[resource_id] == NULL)
1579 			break;
1580 	assert(resource_id < max_guests);
1581 
1582 	guest = xzalloc(sizeof(*guest));
1583 	TAILQ_INIT(&guest->cpu_list);
1584 	TAILQ_INIT(&guest->device_list);
1585 	TAILQ_INIT(&guest->subdevice_list);
1586 	TAILQ_INIT(&guest->mblock_list);
1587 	TAILQ_INIT(&guest->endpoint_list);
1588 	guests[resource_id] = guest;
1589 	guest->name = name;
1590 	guest->gid = resource_id;
1591 	guest->pid = resource_id + 1;
1592 	guest->resource_id = resource_id;
1593 	guest->mdpa = hvmd_alloc_frag(-1);
1594 
1595 	hvmd_add_console(guest);
1596 	hvmd_add_domain_services(guest);
1597 
1598 	return guest;
1599 }
1600 
1601 struct md_node *
1602 guest_add_channel_endpoints(struct guest *guest)
1603 {
1604 	struct md *md = guest->md;
1605 	struct md_node *parent;
1606 	struct md_node *node;
1607 
1608 	parent = md_find_node(md, "root");
1609 	assert(parent);
1610 
1611 	node = md_add_node(md, "channel-endpoints");
1612 	md_link_node(md, parent, node);
1613 
1614 	return node;
1615 }
1616 
1617 struct md_node *
1618 guest_add_endpoint(struct guest *guest, uint64_t id)
1619 {
1620 	struct md *md = guest->md;
1621 	struct md_node *parent;
1622 	struct md_node *node;
1623 
1624 	parent = md_find_node(md, "channel-endpoints");
1625 	if (parent == NULL)
1626 		parent = guest_add_channel_endpoints(guest);
1627 
1628 	node = md_add_node(md, "channel-endpoint");
1629 	md_link_node(md, parent, node);
1630 	md_add_prop_val(md, node, "id", id);
1631 	md_add_prop_val(md, node, "tx-ino", 2 * id);
1632 	md_add_prop_val(md, node, "rx-ino", 2 * id + 1);
1633 
1634 	return node;
1635 }
1636 
1637 struct md_node *
1638 guest_add_vcc(struct guest *guest)
1639 {
1640 	const char compatible[] = "SUNW,sun4v-virtual-console-concentrator";
1641 	struct md *md = guest->md;
1642 	struct md_node *parent;
1643 	struct md_node *node;
1644 
1645 	parent = md_find_node(md, "channel-devices");
1646 	assert(parent != NULL);
1647 
1648 	node = md_add_node(md, "virtual-device");
1649 	md_link_node(md, parent, node);
1650 	md_add_prop_str(md, node, "name", "virtual-console-concentrator");
1651 	md_add_prop_data(md, node, "compatible", compatible,
1652 	    sizeof(compatible));
1653 	md_add_prop_str(md, node, "device_type", "vcc");
1654 	md_add_prop_val(md, node, "cfg-handle", 0x0);
1655 	md_add_prop_str(md, node, "svc-name", "primary-vcc0");
1656 
1657 	return node;
1658 }
1659 
1660 struct md_node *
1661 guest_find_vcc(struct guest *guest)
1662 {
1663 	struct md *md = guest->md;
1664 	struct md_node *node, *node2;
1665 	struct md_prop *prop;
1666 	const char *name;
1667 
1668 	node = md_find_node(md, "channel-devices");
1669 	assert(node != NULL);
1670 
1671 	TAILQ_FOREACH(prop, &node->prop_list, link) {
1672 		if (prop->tag == MD_PROP_ARC &&
1673 		    strcmp(prop->name->str, "fwd") == 0) {
1674 			node2 = prop->d.arc.node;
1675 			if (!md_get_prop_str(md, node2, "name", &name))
1676 				continue;
1677 			if (strcmp(name, "virtual-console-concentrator") == 0)
1678 				return node2;
1679 		}
1680 	}
1681 
1682 	return NULL;
1683 }
1684 
1685 struct md_node *
1686 guest_add_vcc_port(struct guest *guest, struct md_node *vcc,
1687     const char *domain, uint64_t id, uint64_t channel)
1688 {
1689 	struct md *md = guest->md;
1690 	struct md_node *node;
1691 	struct md_node *child;
1692 
1693 	if (vcc == NULL)
1694 		vcc = guest_find_vcc(guest);
1695 	if (vcc == NULL)
1696 		vcc = guest_add_vcc(guest);
1697 
1698 	node = md_add_node(md, "virtual-device-port");
1699 	md_link_node(md, vcc, node);
1700 	md_add_prop_str(md, node, "name", "vcc-port");
1701 	md_add_prop_val(md, node, "id", id);
1702 	md_add_prop_str(md, node, "vcc-domain-name", domain);
1703 	md_add_prop_str(md, node, "vcc-group-name", domain);
1704 	/* OpenBSD doesn't care about this, but Solaris might. */
1705 	md_add_prop_val(md, node, "vcc-tcp-port", 5000 + id);
1706 
1707 	child = guest_add_endpoint(guest, channel);
1708 	md_link_node(md, node, child);
1709 
1710 	return node;
1711 }
1712 
1713 struct md_node *
1714 guest_add_vds(struct guest *guest)
1715 {
1716 	const char compatible[] = "SUNW,sun4v-disk-server";
1717 	struct md *md = guest->md;
1718 	struct md_node *parent;
1719 	struct md_node *node;
1720 
1721 	parent = md_find_node(md, "channel-devices");
1722 	assert(parent != NULL);
1723 
1724 	node = md_add_node(md, "virtual-device");
1725 	md_link_node(md, parent, node);
1726 	md_add_prop_str(md, node, "name", "virtual-disk-server");
1727 	md_add_prop_data(md, node, "compatible", compatible,
1728 	    sizeof(compatible));
1729 	md_add_prop_str(md, node, "device_type", "vds");
1730 	md_add_prop_val(md, node, "cfg-handle", 0x0);
1731 	md_add_prop_str(md, node, "svc-name", "primary-vds0");
1732 
1733 	return node;
1734 }
1735 
1736 struct md_node *
1737 guest_find_vds(struct guest *guest)
1738 {
1739 	struct md *md = guest->md;
1740 	struct md_node *node, *node2;
1741 	struct md_prop *prop;
1742 	const char *name;
1743 
1744 	node = md_find_node(md, "channel-devices");
1745 	assert(node != NULL);
1746 
1747 	TAILQ_FOREACH(prop, &node->prop_list, link) {
1748 		if (prop->tag == MD_PROP_ARC &&
1749 		    strcmp(prop->name->str, "fwd") == 0) {
1750 			node2 = prop->d.arc.node;
1751 			if (!md_get_prop_str(md, node2, "name", &name))
1752 				continue;
1753 			if (strcmp(name, "virtual-disk-server") == 0)
1754 				return node2;
1755 		}
1756 	}
1757 
1758 	return NULL;
1759 }
1760 
1761 struct md_node *
1762 guest_add_vds_port(struct guest *guest, struct md_node *vds,
1763     const char *path, uint64_t id, uint64_t channel)
1764 {
1765 	struct md *md = guest->md;
1766 	struct md_node *node;
1767 	struct md_node *child;
1768 
1769 	if (vds == NULL)
1770 		vds = guest_find_vds(guest);
1771 	if (vds == NULL)
1772 		vds = guest_add_vds(guest);
1773 
1774 	node = md_add_node(md, "virtual-device-port");
1775 	md_link_node(md, vds, node);
1776 	md_add_prop_str(md, node, "name", "vds-port");
1777 	md_add_prop_val(md, node, "id", id);
1778 	md_add_prop_str(md, node, "vds-block-device", path);
1779 
1780 	child = guest_add_endpoint(guest, channel);
1781 	md_link_node(md, node, child);
1782 
1783 	return node;
1784 }
1785 
1786 struct md_node *
1787 guest_add_vsw(struct guest *guest)
1788 {
1789 	const char compatible[] = "SUNW,sun4v-network-switch";
1790 	struct md *md = guest->md;
1791 	struct md_node *parent;
1792 	struct md_node *node;
1793 
1794 	parent = md_find_node(md, "channel-devices");
1795 	assert(parent != NULL);
1796 
1797 	node = md_add_node(md, "virtual-device");
1798 	md_link_node(md, parent, node);
1799 	md_add_prop_str(md, node, "name", "virtual-network-switch");
1800 	md_add_prop_data(md, node, "compatible", compatible,
1801 	    sizeof(compatible));
1802 	md_add_prop_str(md, node, "device_type", "vsw");
1803 	md_add_prop_val(md, node, "cfg-handle", 0x0);
1804 	md_add_prop_str(md, node, "svc-name", "primary-vsw0");
1805 
1806 	return node;
1807 }
1808 
1809 struct md_node *
1810 guest_find_vsw(struct guest *guest)
1811 {
1812 	struct md *md = guest->md;
1813 	struct md_node *node, *node2;
1814 	struct md_prop *prop;
1815 	const char *name;
1816 
1817 	node = md_find_node(md, "channel-devices");
1818 	assert(node != NULL);
1819 
1820 	TAILQ_FOREACH(prop, &node->prop_list, link) {
1821 		if (prop->tag == MD_PROP_ARC &&
1822 		    strcmp(prop->name->str, "fwd") == 0) {
1823 			node2 = prop->d.arc.node;
1824 			if (!md_get_prop_str(md, node2, "name", &name))
1825 				continue;
1826 			if (strcmp(name, "virtual-network-switch") == 0)
1827 				return node2;
1828 		}
1829 	}
1830 
1831 	return NULL;
1832 }
1833 
1834 struct md_node *
1835 guest_add_vsw_port(struct guest *guest, struct md_node *vds,
1836     uint64_t id, uint64_t channel)
1837 {
1838 	struct md *md = guest->md;
1839 	struct md_node *node;
1840 	struct md_node *child;
1841 	uint64_t mac_addr;
1842 
1843 	if (vds == NULL)
1844 		vds = guest_find_vsw(guest);
1845 	if (vds == NULL)
1846 		vds = guest_add_vsw(guest);
1847 	if (!md_get_prop_val(md, vds, "local-mac-address", &mac_addr)) {
1848 		mac_addr = 0x00144ff80000 + (arc4random() & 0x3ffff);
1849 		md_add_prop_val(md, vds, "local-mac-address", mac_addr);
1850 	}
1851 
1852 	node = md_add_node(md, "virtual-device-port");
1853 	md_link_node(md, vds, node);
1854 	md_add_prop_str(md, node, "name", "vsw-port");
1855 	md_add_prop_val(md, node, "id", id);
1856 
1857 	child = guest_add_endpoint(guest, channel);
1858 	md_link_node(md, node, child);
1859 
1860 	return node;
1861 }
1862 
1863 struct md_node *
1864 guest_add_console_device(struct guest *guest)
1865 {
1866 	const char compatible[] = "SUNW,sun4v-console";
1867 	struct md *md = guest->md;
1868 	struct md_node *parent;
1869 	struct md_node *node;
1870 
1871 	parent = md_find_node(md, "virtual-devices");
1872 	assert(parent);
1873 
1874 	node = md_add_node(md, "virtual-device");
1875 	md_link_node(md, parent, node);
1876 	md_add_prop_str(md, node, "name", "console");
1877 	md_add_prop_str(md, node, "device-type", "serial");
1878 	md_add_prop_val(md, node, "intr", 0x1);
1879 	md_add_prop_val(md, node, "ino", 0x11);
1880 	md_add_prop_val(md, node, "channel#", 0);
1881 	md_add_prop_val(md, node, "cfg-handle", 0x1);
1882 	md_add_prop_data(md, node, "compatible", compatible,
1883 	    sizeof(compatible));
1884 
1885 	return node;
1886 }
1887 
1888 struct md_node *
1889 guest_add_vdc(struct guest *guest, uint64_t cfghandle)
1890 {
1891 	const char compatible[] = "SUNW,sun4v-disk";
1892 	struct md *md = guest->md;
1893 	struct md_node *parent;
1894 	struct md_node *node;
1895 
1896 	parent = md_find_node(md, "channel-devices");
1897 	assert(parent);
1898 
1899 	node = md_add_node(md, "virtual-device");
1900 	md_link_node(md, parent, node);
1901 	md_add_prop_str(md, node, "name", "disk");
1902 	md_add_prop_str(md, node, "device-type", "block");
1903 	md_add_prop_val(md, node, "cfg-handle", cfghandle);
1904 	md_add_prop_data(md, node, "compatible", compatible,
1905 	    sizeof(compatible));
1906 
1907 	return node;
1908 }
1909 
1910 struct md_node *
1911 guest_add_vdc_port(struct guest *guest, struct md_node *vdc,
1912     uint64_t cfghandle, uint64_t id, uint64_t channel)
1913 {
1914 	struct md *md = guest->md;
1915 	struct md_node *node;
1916 	struct md_node *child;
1917 
1918 	if (vdc == NULL)
1919 		vdc = guest_add_vdc(guest, cfghandle);
1920 
1921 	node = md_add_node(md, "virtual-device-port");
1922 	md_link_node(md, vdc, node);
1923 	md_add_prop_str(md, node, "name", "vdc-port");
1924 	md_add_prop_val(md, node, "id", id);
1925 
1926 	child = guest_add_endpoint(guest, channel);
1927 	md_link_node(md, node, child);
1928 
1929 	return node;
1930 }
1931 
1932 struct md_node *
1933 guest_add_vnet(struct guest *guest, uint64_t mac_addr, uint64_t mtu,
1934     uint64_t cfghandle)
1935 {
1936 	const char compatible[] = "SUNW,sun4v-network";
1937 	struct md *md = guest->md;
1938 	struct md_node *parent;
1939 	struct md_node *node;
1940 
1941 	parent = md_find_node(md, "channel-devices");
1942 	assert(parent);
1943 
1944 	node = md_add_node(md, "virtual-device");
1945 	md_link_node(md, parent, node);
1946 	md_add_prop_str(md, node, "name", "network");
1947 	md_add_prop_str(md, node, "device-type", "network");
1948 	md_add_prop_val(md, node, "cfg-handle", cfghandle);
1949 	md_add_prop_data(md, node, "compatible", compatible,
1950 	    sizeof(compatible));
1951 	if (mac_addr == -1)
1952 		mac_addr = 0x00144ff80000 + (arc4random() & 0x3ffff);
1953 	md_add_prop_val(md, node, "local-mac-address", mac_addr);
1954 	md_add_prop_val(md, node, "mtu", mtu);
1955 
1956 	return node;
1957 }
1958 
1959 struct md_node *
1960 guest_add_vnet_port(struct guest *guest, struct md_node *vdc,
1961     uint64_t mac_addr, uint64_t remote_mac_addr, uint64_t mtu, uint64_t cfghandle,
1962     uint64_t id, uint64_t channel)
1963 {
1964 	struct md *md = guest->md;
1965 	struct md_node *node;
1966 	struct md_node *child;
1967 
1968 	if (vdc == NULL)
1969 		vdc = guest_add_vnet(guest, mac_addr, mtu, cfghandle);
1970 
1971 	node = md_add_node(md, "virtual-device-port");
1972 	md_link_node(md, vdc, node);
1973 	md_add_prop_str(md, node, "name", "vnet-port");
1974 	md_add_prop_val(md, node, "id", id);
1975 	md_add_prop_val(md, node, "switch-port", 0);
1976 	md_add_prop_data(md, node, "remote-mac-address",
1977 	    (uint8_t *)&remote_mac_addr, sizeof(remote_mac_addr));
1978 
1979 	child = guest_add_endpoint(guest, channel);
1980 	md_link_node(md, node, child);
1981 
1982 	return node;
1983 }
1984 
1985 struct md_node *
1986 guest_add_channel_devices(struct guest *guest)
1987 {
1988 	const char compatible[] = "SUNW,sun4v-channel-devices";
1989 	struct md *md = guest->md;
1990 	struct md_node *parent;
1991 	struct md_node *node;
1992 
1993 	parent = md_find_node(md, "virtual-devices");
1994 	assert(parent);
1995 
1996 	node = md_add_node(md, "channel-devices");
1997 	md_link_node(md, parent, node);
1998 	md_add_prop_str(md, node, "name", "channel-devices");
1999 	md_add_prop_str(md, node, "device-type", "channel-devices");
2000 	md_add_prop_data(md, node, "compatible", compatible,
2001 	    sizeof(compatible));
2002 	md_add_prop_val(md, node, "cfg-handle", 0x200);
2003 
2004 	return node;
2005 }
2006 
2007 struct md_node *
2008 guest_add_domain_services(struct guest *guest)
2009 {
2010 	struct md *md = guest->md;
2011 	struct md_node *parent;
2012 	struct md_node *node;
2013 
2014 	parent = md_find_node(md, "root");
2015 	assert(parent);
2016 
2017 	node = md_add_node(md, "domain-services");
2018 	md_link_node(md, parent, node);
2019 
2020 	return node;
2021 }
2022 
2023 struct md_node *
2024 guest_add_domain_services_port(struct guest *guest, uint64_t id)
2025 {
2026 	struct md *md = guest->md;
2027 	struct md_node *parent;
2028 	struct md_node *node;
2029 	struct md_node *child;
2030 
2031 	parent = md_find_node(md, "domain-services");
2032 	if (parent == NULL)
2033 		parent = guest_add_domain_services(guest);
2034 
2035 	node = md_add_node(md, "domain-services-port");
2036 	md_link_node(md, parent, node);
2037 	md_add_prop_val(md, node, "id", id);
2038 
2039 	child = guest_add_endpoint(guest,
2040 	    guest->domain_services.client_endpoint->channel);
2041 	md_link_node(md, node, child);
2042 
2043 	return node;
2044 }
2045 
2046 void
2047 guest_add_devalias(struct guest *guest, const char *name, const char *path)
2048 {
2049 	struct md *md = guest->md;
2050 	struct md_node *parent;
2051 	struct md_node *node;
2052 
2053 	parent = md_find_node(md, "openboot");
2054 	assert(parent);
2055 
2056 	node = md_find_subnode(md, parent, "devalias");
2057 	if (node == NULL) {
2058 		node = md_add_node(md, "devalias");
2059 		md_link_node(md, parent, node);
2060 	}
2061 
2062 	md_add_prop_str(md, node, name, path);
2063 }
2064 
2065 void
2066 guest_set_domaining_enabled(struct guest *guest)
2067 {
2068 	struct md *md = guest->md;
2069 	struct md_node *node;
2070 
2071 	node = md_find_node(md, "platform");
2072 	assert(node);
2073 
2074 	md_set_prop_val(md, node, "domaining-enabled", 0x1);
2075 }
2076 
2077 void
2078 guest_set_mac_address(struct guest *guest)
2079 {
2080 	struct md *md = guest->md;
2081 	struct md_node *node;
2082 	uint64_t mac_address;
2083 	uint64_t hostid;
2084 
2085 	node = md_find_node(md, "platform");
2086 	assert(node);
2087 
2088 	mac_address = 0x00144ff80000 + (arc4random() & 0x3ffff);
2089 	md_set_prop_val(md, node, "mac-address", mac_address);
2090 
2091 	hostid = 0x84000000 | (mac_address & 0x00ffffff);
2092 	md_set_prop_val(md, node, "hostid", hostid);
2093 }
2094 
2095 struct md_node *
2096 guest_find_vc(struct guest *guest)
2097 {
2098 	struct md *md = guest->md;
2099 	struct md_node *node, *node2;
2100 	struct md_node *vc = NULL;
2101 	struct md_prop *prop;
2102 	const char *name;
2103 
2104 	node = md_find_node(md, "channel-devices");
2105 	assert(node != NULL);
2106 
2107 	TAILQ_FOREACH(prop, &node->prop_list, link) {
2108 		if (prop->tag == MD_PROP_ARC &&
2109 		    strcmp(prop->name->str, "fwd") == 0) {
2110 			node2 = prop->d.arc.node;
2111 			if (!md_get_prop_str(md, node2, "name", &name))
2112 				continue;
2113 			if (strcmp(name, "virtual-channel") == 0)
2114 				vc = node2;
2115 		}
2116 	}
2117 
2118 	return vc;
2119 }
2120 
2121 struct md_node *
2122 guest_add_vc_port(struct guest *guest, struct md_node *vc,
2123     const char *domain, uint64_t id, uint64_t channel)
2124 {
2125 	struct md *md = guest->md;
2126 	struct md_node *node;
2127 	struct md_node *child;
2128 	char *str;
2129 
2130 	if (vc == NULL)
2131 		vc = guest_find_vc(guest);
2132 	assert(vc);
2133 
2134 	node = md_add_node(md, "virtual-device-port");
2135 	md_link_node(md, vc, node);
2136 	md_add_prop_str(md, node, "name", "vldc-port");
2137 	md_add_prop_val(md, node, "id", id);
2138 	xasprintf(&str, "ldom-%s", domain);
2139 	md_add_prop_str(md, node, "vldc-svc-name", str);
2140 	free(str);
2141 
2142 	child = guest_add_endpoint(guest, channel);
2143 	md_link_node(md, node, child);
2144 
2145 	return node;
2146 }
2147 
2148 struct guest *
2149 guest_create(const char *name)
2150 {
2151 	struct guest *guest;
2152 	struct guest *primary;
2153 	struct md_node *node;
2154 
2155 	primary = guest_lookup("primary");
2156 
2157 	guest = hvmd_add_guest(name);
2158 	guest->md = md_copy(protomd);
2159 
2160 	md_find_delete_node(guest->md, "dimm_configuration");
2161 	md_find_delete_node(guest->md, "platform_services");
2162 	md_collect_garbage(guest->md);
2163 
2164 	guest_set_domaining_enabled(guest);
2165 	guest_set_mac_address(guest);
2166 	guest_add_channel_devices(guest);
2167 	guest_add_domain_services_port(guest, 0);
2168 	guest_add_console_device(guest);
2169 	guest_add_devalias(guest, "virtual-console",
2170 	    "/virtual-devices/console@1");
2171 
2172 	guest_add_vcc_port(primary, NULL, guest->name, guest->gid - 1,
2173 	    guest->console->server_endpoint->channel);
2174 
2175 	guest_add_vc_port(primary, NULL, guest->name, guest->gid + 2,
2176 	    guest->domain_services.server_endpoint->channel);
2177 
2178 	node = md_find_node(guest->md, "root");
2179 	md_add_prop_val(guest->md, node, "reset-reason", 0);
2180 
2181 	return guest;
2182 }
2183 
2184 int
2185 guest_match_path(struct guest *guest, const char *path)
2186 {
2187 	struct subdevice *subdevice;
2188 	size_t len = strlen(path);
2189 
2190 	TAILQ_FOREACH(subdevice, &guest->subdevice_list, link) {
2191 		const char *path2 = subdevice->path;
2192 		size_t len2 = strlen(path2);
2193 
2194 		if (strncmp(path, path2, len < len2 ? len : len2) == 0)
2195 			return 1;
2196 	}
2197 
2198 	return 0;
2199 }
2200 
2201 void
2202 guest_prune_phys_io(struct guest *guest)
2203 {
2204 	const char compatible[] = "SUNW,sun4v-vpci";
2205 	struct md *md = guest->md;
2206 	struct md_node *node, *node2;
2207 	struct md_prop *prop, *prop2;
2208 	const char *device_type;
2209 	uint64_t cfg_handle;
2210 	char *path;
2211 
2212 	node = md_find_node(guest->md, "phys_io");
2213 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2214 		if (prop->tag == MD_PROP_ARC &&
2215 		    strcmp(prop->name->str, "fwd") == 0) {
2216 			node2 = prop->d.arc.node;
2217 			if (!md_get_prop_str(md, node2, "device-type",
2218 			    &device_type))
2219 				device_type = "unknown";
2220 			if (strcmp(device_type, "pciex") != 0) {
2221 				md_delete_node(md, node2);
2222 				continue;
2223 			}
2224 
2225 			if (!md_get_prop_val(md, node2, "cfg-handle",
2226 			    &cfg_handle)) {
2227 				md_delete_node(md, node2);
2228 				continue;
2229 			}
2230 
2231 			xasprintf(&path, "/@%llx", cfg_handle);
2232 			if (!guest_match_path(guest, path)) {
2233 				md_delete_node(md, node2);
2234 				continue;
2235 			}
2236 
2237 			md_set_prop_data(md, node2, "compatible",
2238 			    compatible, sizeof(compatible));
2239 			md_add_prop_val(md, node2, "virtual-root-complex", 1);
2240 			guest_prune_pcie(guest, node2, path);
2241 			free(path);
2242 
2243 			guest_add_vpcie(guest, cfg_handle);
2244 		}
2245 	}
2246 }
2247 
2248 void
2249 guest_prune_pcie(struct guest *guest, struct md_node *node, const char *path)
2250 {
2251 	struct md *md = guest->md;
2252 	struct md_node *node2;
2253 	struct md_prop *prop, *prop2;
2254 	uint64_t device_number;
2255 	char *path2;
2256 
2257 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2258 		if (prop->tag == MD_PROP_ARC &&
2259 		    strcmp(prop->name->str, "fwd") == 0) {
2260 			node2 = prop->d.arc.node;
2261 			if (strcmp(node2->name->str, "wart") == 0) {
2262 				md_delete_node(md, node2);
2263 				continue;
2264 			}
2265 			if (!md_get_prop_val(md, node2, "device-number",
2266 			    &device_number))
2267 				continue;
2268 			xasprintf(&path2, "%s/@%llx", path, device_number);
2269 			if (guest_match_path(guest, path2))
2270 				guest_prune_pcie(guest, node2, path2);
2271 			else
2272 				md_delete_node(md, node2);
2273 			free(path2);
2274 		}
2275 	}
2276 }
2277 
2278 void
2279 guest_add_vpcie(struct guest *guest, uint64_t cfghandle)
2280 {
2281 	struct device *device, *phys_device = NULL;
2282 	uint64_t resource_id;
2283 
2284 	for (resource_id = 0; resource_id < max_devices; resource_id++) {
2285 		if (pcie_busses[resource_id] &&
2286 		    pcie_busses[resource_id]->cfghandle == cfghandle) {
2287 			phys_device = pcie_busses[resource_id];
2288 			break;
2289 		}
2290 	}
2291 	if (phys_device == NULL)
2292 		errx(1, "no matching physical device");
2293 
2294 	for (resource_id = 0; resource_id < max_devices; resource_id++) {
2295 		if (pcie_busses[resource_id] == NULL)
2296 			break;
2297 	}
2298 	if (resource_id >= max_devices)
2299 		errx(1, "no available resource_id");
2300 
2301 	device = xzalloc(sizeof(*device));
2302 	device->gid = guest->gid;
2303 	device->cfghandle = cfghandle;
2304 	device->resource_id = resource_id;
2305 	device->rcid = phys_device->rcid;
2306 	device->virtual = 1;
2307 	device->guest = guest;
2308 
2309 	device->num_msi_eqs = phys_device->msi_eqs_per_vpci;
2310 	device->num_msis = phys_device->msis_per_vpci;
2311 	phys_device->msi_base -= phys_device->msis_per_vpci;
2312 	device->msi_ranges[0] = phys_device->msi_base;
2313 	device->msi_ranges[1] = device->num_msis;
2314 
2315 	pcie_busses[resource_id] = device;
2316 	TAILQ_INSERT_TAIL(&guest->device_list, device, link);
2317 }
2318 
2319 void
2320 guest_fixup_phys_io(struct guest *guest)
2321 {
2322 	struct md *md = guest->md;
2323 	struct md_node *node, *node2;
2324 	struct md_prop *prop, *prop2;
2325 	struct device *device;
2326 	uint64_t cfg_handle;
2327 	uint64_t mapping[3];
2328 	const void *buf;
2329 	size_t len;
2330 
2331 	if (!directio_capability)
2332 		return;
2333 
2334 	node = md_find_node(guest->md, "phys_io");
2335 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2336 		if (prop->tag == MD_PROP_ARC &&
2337 		    strcmp(prop->name->str, "fwd") == 0) {
2338 			node2 = prop->d.arc.node;
2339 
2340 			if (!md_get_prop_val(md, node2, "cfg-handle",
2341 			    &cfg_handle))
2342 				continue;
2343 
2344 			TAILQ_FOREACH(device, &guest->device_list, link) {
2345 				if (device->cfghandle == cfg_handle)
2346 					break;
2347 			}
2348 			if (device == NULL)
2349 				continue;
2350 
2351 			md_set_prop_val(md, node2, "#msi-eqs",
2352 			    device->num_msi_eqs);
2353 			md_set_prop_val(md, node2, "#msi",
2354 			    device->num_msis);
2355 			md_set_prop_data(md, node2, "msi-ranges",
2356 			    (void *)device->msi_ranges,
2357 			    sizeof(device->msi_ranges));
2358 
2359 			md_get_prop_data(md, node2, "msi-eq-to-devino",
2360 			    &buf, &len);
2361 			memcpy(mapping, buf, sizeof(mapping));
2362 			mapping[1] = device->num_msi_eqs;
2363 			md_set_prop_data(md, node2, "msi-eq-to-devino",
2364 			    (void *)mapping, sizeof(mapping));
2365 		}
2366 	}
2367 }
2368 
2369 struct guest *
2370 guest_lookup(const char *name)
2371 {
2372 	uint64_t resource_id;
2373 
2374 	for (resource_id = 0; resource_id < max_guests; resource_id++) {
2375 		if (guests[resource_id] &&
2376 		    strcmp(guests[resource_id]->name, name) == 0)
2377 			return guests[resource_id];
2378 	}
2379 
2380 	return NULL;
2381 }
2382 
2383 void
2384 guest_delete_virtual_device_port(struct guest *guest, struct md_node *port)
2385 {
2386 	struct md *md = guest->md;
2387 	struct md_node *node;
2388 	struct md_prop *prop;
2389 
2390 	TAILQ_FOREACH(node, &md->node_list, link) {
2391 		if (strcmp(node->name->str, "virtual-device-port") != 0)
2392 			continue;
2393 		TAILQ_FOREACH(prop, &node->prop_list, link) {
2394 			if (prop->tag == MD_PROP_ARC &&
2395 			    prop->d.arc.node == port) {
2396 				md_delete_node(md, node);
2397 				return;
2398 			}
2399 		}
2400 	}
2401 }
2402 
2403 void
2404 guest_delete_endpoint(struct guest *guest, struct ldc_endpoint *endpoint)
2405 {
2406 	struct md *md = guest->md;
2407 	struct md_node *node, *node2;
2408 	struct md_prop *prop;
2409 	uint64_t id, resource_id;
2410 
2411 	node = md_find_node(md, "channel-endpoints");
2412 	TAILQ_FOREACH(prop, &node->prop_list, link) {
2413 		if (prop->tag == MD_PROP_ARC &&
2414 		    strcmp(prop->name->str, "fwd") == 0) {
2415 			node2 = prop->d.arc.node;
2416 			if (!md_get_prop_val(hvmd, node2, "id", &id))
2417 				continue;
2418 			if (id == endpoint->channel) {
2419 				guest_delete_virtual_device_port(guest, node2);
2420 				md_delete_node(md, node2);
2421 				break;
2422 			}
2423 		}
2424 	}
2425 
2426 	TAILQ_REMOVE(&guest->endpoint_list, endpoint, link);
2427 	ldc_endpoints[endpoint->resource_id] = NULL;
2428 
2429 	/* Delete peer as well. */
2430 	for (resource_id = 0; resource_id < max_guest_ldcs; resource_id++) {
2431 		struct ldc_endpoint *peer = ldc_endpoints[resource_id];
2432 
2433 		if (peer && peer->target_type == LDC_GUEST &&
2434 		    peer->target_channel == endpoint->channel &&
2435 		    peer->channel == endpoint->target_channel &&
2436 		    peer->target_guest == guest->gid)
2437 			guest_delete_endpoint(peer->guest, peer);
2438 	}
2439 
2440 	free(endpoint);
2441 }
2442 
2443 void
2444 guest_delete(struct guest *guest)
2445 {
2446 	struct cpu *cpu, *cpu2;
2447 	struct mblock *mblock, *mblock2;
2448 	struct ldc_endpoint *endpoint, *endpoint2;
2449 
2450 	consoles[guest->console->resource_id] = NULL;
2451 	free(guest->console);
2452 
2453 	TAILQ_FOREACH_SAFE(cpu, &guest->cpu_list, link, cpu2) {
2454 		TAILQ_REMOVE(&guest->cpu_list, cpu, link);
2455 		cpus[cpu->resource_id] = NULL;
2456 		pri_free_cpu(cpu);
2457 	}
2458 
2459 	TAILQ_FOREACH_SAFE(mblock, &guest->mblock_list, link, mblock2) {
2460 		TAILQ_REMOVE(&guest->mblock_list, mblock, link);
2461 		mblocks[mblock->resource_id] = NULL;
2462 		free(mblock);
2463 	}
2464 
2465 	TAILQ_FOREACH_SAFE(endpoint, &guest->endpoint_list, link, endpoint2)
2466 		guest_delete_endpoint(guest, endpoint);
2467 
2468 	hvmd_free_frag(guest->mdpa);
2469 
2470 	guests[guest->resource_id] = NULL;
2471 	free(guest);
2472 }
2473 
2474 void
2475 guest_delete_cpu(struct guest *guest, uint64_t vid)
2476 {
2477 	struct cpu *cpu;
2478 
2479 	TAILQ_FOREACH(cpu, &guest->cpu_list, link) {
2480 		if (cpu->vid == vid) {
2481 			TAILQ_REMOVE(&guest->cpu_list, cpu, link);
2482 			cpus[cpu->resource_id] = NULL;
2483 			pri_free_cpu(cpu);
2484 			return;
2485 		}
2486 	}
2487 }
2488 
2489 void
2490 guest_add_cpu(struct guest *guest, uint64_t stride)
2491 {
2492 	struct cpu *cpu;
2493 
2494 	cpu = pri_alloc_cpu(-1);
2495 
2496 	/*
2497 	 * Allocate (but don't assign) additional virtual CPUs if the
2498 	 * specified stride is bigger than one.
2499 	 */
2500 	while (stride-- > 1)
2501 		pri_alloc_cpu(-1);
2502 
2503 	if (cpu->resource_id == -1) {
2504 		uint64_t resource_id;
2505 
2506 		for (resource_id = 0; resource_id < max_cpus; resource_id++)
2507 			if (cpus[resource_id] == NULL)
2508 				break;
2509 		assert(resource_id < max_cpus);
2510 		cpu->resource_id = resource_id;
2511 	}
2512 	cpus[cpu->resource_id] = cpu;
2513 
2514 	cpu->vid = guest->cpu_vid++;
2515 	cpu->gid = guest->gid;
2516 	cpu->partid = 1;
2517 
2518 	TAILQ_INSERT_TAIL(&guest->cpu_list, cpu, link);
2519 	cpu->guest = guest;
2520 }
2521 
2522 void
2523 guest_delete_memory(struct guest *guest)
2524 {
2525 	struct mblock *mblock, *tmp;
2526 
2527 	TAILQ_FOREACH_SAFE(mblock, &guest->mblock_list, link, tmp) {
2528 		if (mblock->resource_id != -1)
2529 			mblocks[mblock->resource_id] = NULL;
2530 		TAILQ_REMOVE(&guest->mblock_list, mblock, link);
2531 		free(mblock);
2532 	}
2533 }
2534 
2535 void
2536 guest_add_memory(struct guest *guest, uint64_t base, uint64_t size)
2537 {
2538 	struct mblock *mblock;
2539 	uint64_t resource_id;
2540 
2541 	mblock = pri_alloc_memory(base, size);
2542 	if (mblock == NULL)
2543 		errx(1, "unable to allocate guest memory");
2544 	for (resource_id = 0; resource_id < max_cpus; resource_id++)
2545 		if (mblocks[resource_id] == NULL)
2546 			break;
2547 	assert(resource_id < max_mblocks);
2548 	mblock->resource_id = resource_id;
2549 	mblocks[resource_id] = mblock;
2550 
2551 	mblock->realbase = mblock->membase & (max_page_size - 1);
2552 	if (mblock->realbase == 0)
2553 		mblock->realbase = max_page_size;
2554 
2555 	TAILQ_INSERT_TAIL(&guest->mblock_list, mblock, link);
2556 	mblock->guest = guest;
2557 }
2558 
2559 void
2560 guest_add_vdisk(struct guest *guest, uint64_t id, const char *path,
2561     const char *user_devalias)
2562 {
2563 	struct guest *primary;
2564 	struct ldc_channel *lc;
2565 	char *devalias;
2566 	char *devpath;
2567 
2568 	primary = guest_lookup("primary");
2569 
2570 	lc = hvmd_add_vio(guest);
2571 	guest_add_vds_port(primary, NULL, path, id,
2572 	    lc->server_endpoint->channel);
2573 	guest_add_vdc_port(guest, NULL, id, 0, lc->client_endpoint->channel);
2574 
2575 	xasprintf(&devalias, "disk%d", id);
2576 	xasprintf(&devpath,
2577 	    "/virtual-devices@100/channel-devices@200/disk@%d", id);
2578 	if (id == 0)
2579 		guest_add_devalias(guest, "disk", devpath);
2580 	guest_add_devalias(guest, devalias, devpath);
2581 	if (user_devalias != NULL)
2582 		guest_add_devalias(guest, user_devalias, devpath);
2583 	free(devalias);
2584 	free(devpath);
2585 }
2586 
2587 void
2588 guest_add_vnetwork(struct guest *guest, uint64_t id, uint64_t mac_addr,
2589     uint64_t mtu, const char *user_devalias)
2590 {
2591 	struct guest *primary;
2592 	struct ldc_channel *lc;
2593 	char *devalias;
2594 	char *devpath;
2595 	struct md_node *node;
2596 	uint64_t remote_mac_addr = -1;
2597 
2598 	primary = guest_lookup("primary");
2599 
2600 	lc = hvmd_add_vio(guest);
2601 	guest_add_vsw_port(primary, NULL, id, lc->server_endpoint->channel);
2602 	node = guest_find_vsw(primary);
2603 	md_get_prop_val(primary->md, node, "local-mac-address", &remote_mac_addr);
2604 	guest_add_vnet_port(guest, NULL, mac_addr, remote_mac_addr, mtu, id, 0,
2605 	    lc->client_endpoint->channel);
2606 
2607 	xasprintf(&devalias, "net%d", id);
2608 	xasprintf(&devpath,
2609 	    "/virtual-devices@100/channel-devices@200/network@%d", id);
2610 	if (id == 0)
2611 		guest_add_devalias(guest, "net", devpath);
2612 	guest_add_devalias(guest, devalias, devpath);
2613 	if (user_devalias != NULL)
2614 		guest_add_devalias(guest, user_devalias, devpath);
2615 	free(devalias);
2616 	free(devpath);
2617 }
2618 
2619 void
2620 guest_add_variable(struct guest *guest, const char *name, const char *str)
2621 {
2622 	struct md *md = guest->md;
2623 	struct md_node *parent;
2624 	struct md_node *node;
2625 
2626 	node = md_find_node(md, "variables");
2627 	if (node == NULL) {
2628 		parent = md_find_node(md, "root");
2629 		assert(parent);
2630 
2631 		node = md_add_node(md, "variables");
2632 		md_link_node(md, parent, node);
2633 	}
2634 
2635 	md_add_prop_str(md, node, name, str);
2636 }
2637 
2638 void
2639 guest_add_iodev(struct guest *guest, const char *path)
2640 {
2641 	struct component *component;
2642 	struct subdevice *subdevice;
2643 
2644 	if (!directio_capability)
2645 		errx(1, "direct I/O not supported by hypervisor");
2646 
2647 	TAILQ_FOREACH(component, &components, link) {
2648 		if (strcmp(component->path, path) == 0)
2649 			break;
2650 	}
2651 
2652 	if (component == NULL)
2653 		errx(1, "incorrect device path %s", path);
2654 	if (component->assigned)
2655 		errx(1, "device path %s already assigned", path);
2656 
2657 	subdevice = xzalloc(sizeof(*subdevice));
2658 	subdevice->path = path;
2659 	TAILQ_INSERT_TAIL(&guest->subdevice_list, subdevice, link);
2660 	component->assigned = 1;
2661 }
2662 
2663 struct cpu *
2664 guest_find_cpu(struct guest *guest, uint64_t pid)
2665 {
2666 	struct cpu *cpu;
2667 
2668 	TAILQ_FOREACH(cpu, &guest->cpu_list, link)
2669 		if (cpu->pid == pid)
2670 			return cpu;
2671 
2672 	return NULL;
2673 }
2674 
2675 void
2676 guest_finalize(struct guest *guest)
2677 {
2678 	struct md *md = guest->md;
2679 	struct md_node *node, *node2;
2680 	struct md_prop *prop, *prop2;
2681 	struct mblock *mblock;
2682 	struct md_node *parent;
2683 	struct md_node *child;
2684 	struct cpu *cpu;
2685 	uint64_t pid;
2686 	const char *name;
2687 	char *path;
2688 
2689 	node = md_find_node(md, "cpus");
2690 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2691 		if (prop->tag == MD_PROP_ARC &&
2692 		    strcmp(prop->name->str, "fwd") == 0) {
2693 			node2 = prop->d.arc.node;
2694 			if (!md_get_prop_val(md, node2, "pid", &pid))
2695 				if (!md_get_prop_val(md, node2, "id", &pid))
2696 					continue;
2697 			cpu = guest_find_cpu(guest, pid);
2698 			if (cpu == NULL) {
2699 				md_delete_node(md, node2);
2700 				continue;
2701 			}
2702 			md_set_prop_val(md, node2, "id", cpu->vid);
2703 		}
2704 	}
2705 
2706 	/*
2707 	 * We don't support crypto units yet, so delete any "ncp" and
2708 	 * "n2cp" nodes.  If we don't, Solaris whines about not being
2709 	 * able to configure crypto work queues.
2710 	 */
2711 	node = md_find_node(md, "virtual-devices");
2712 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2713 		if (prop->tag == MD_PROP_ARC &&
2714 		    strcmp(prop->name->str, "fwd") == 0) {
2715 			node2 = prop->d.arc.node;
2716 			if (!md_get_prop_str(md, node2, "name", &name))
2717 				continue;
2718 			if (strcmp(name, "ncp") == 0)
2719 				md_delete_node(md, node2);
2720 			if (strcmp(name, "n2cp") == 0)
2721 				md_delete_node(md, node2);
2722 		}
2723 	}
2724 
2725 	node = md_find_node(md, "memory");
2726 	TAILQ_FOREACH_SAFE(prop, &node->prop_list, link, prop2) {
2727 		if (prop->tag == MD_PROP_ARC &&
2728 		    strcmp(prop->name->str, "fwd") == 0) {
2729 			node2 = prop->d.arc.node;
2730 			md_delete_node(md, node2);
2731 		}
2732 	}
2733 
2734 	if (strcmp(guest->name, "primary") != 0)
2735 		guest_prune_phys_io(guest);
2736 	guest_fixup_phys_io(guest);
2737 
2738 	md_collect_garbage(md);
2739 
2740 	parent = md_find_node(md, "memory");
2741 	TAILQ_FOREACH(mblock, &guest->mblock_list, link) {
2742 		child = md_add_node(md, "mblock");
2743 		md_add_prop_val(md, child, "base", mblock->realbase);
2744 		md_add_prop_val(md, child, "size", mblock->memsize);
2745 		md_link_node(md, parent, child);
2746 	}
2747 
2748 	xasprintf(&path, "%s.md", guest->name);
2749 	md_write(guest->md, path);
2750 	free(path);
2751 }
2752 
2753 struct guest *
2754 primary_init(void)
2755 {
2756 	struct guest *guest;
2757 
2758 	guest = guest_lookup("primary");
2759 	assert(guest);
2760 
2761 	guest_set_domaining_enabled(guest);
2762 
2763 	return guest;
2764 }
2765 
2766 void
2767 build_config(const char *filename, int noaction)
2768 {
2769 	struct guest *primary;
2770 	struct guest *guest;
2771 	struct ldc_endpoint *endpoint;
2772 	struct component *component;
2773 	uint64_t resource_id;
2774 	int i;
2775 
2776 	struct ldom_config conf;
2777 	struct domain *domain;
2778 	struct vdisk *vdisk;
2779 	struct vnet *vnet;
2780 	struct var *var;
2781 	struct iodev *iodev;
2782 	uint64_t num_cpus = 0, primary_num_cpus = 0;
2783 	uint64_t primary_stride = 1;
2784 	uint64_t memory = 0, primary_memory = 0;
2785 
2786 	SIMPLEQ_INIT(&conf.domain_list);
2787 	if (parse_config(filename, &conf) < 0)
2788 		exit(1);
2789 	if (noaction)
2790 		exit(0);
2791 
2792 	pri = md_read("pri");
2793 	if (pri == NULL)
2794 		err(1, "unable to get PRI");
2795 	hvmd = md_read("hv.md");
2796 	if (hvmd == NULL)
2797 		err(1, "unable to get Hypervisor MD");
2798 
2799 	pri_init(pri);
2800 	pri_alloc_memory(hv_membase, hv_memsize);
2801 
2802 	SIMPLEQ_FOREACH(domain, &conf.domain_list, entry) {
2803 		if (strcmp(domain->name, "primary") == 0) {
2804 			primary_num_cpus = domain->vcpu;
2805 			primary_stride = domain->vcpu_stride;
2806 			primary_memory = domain->memory;
2807 		}
2808 		num_cpus += (domain->vcpu * domain->vcpu_stride);
2809 		memory += domain->memory;
2810 	}
2811 	if (primary_num_cpus == 0 && total_cpus > num_cpus)
2812 		primary_num_cpus = total_cpus - num_cpus;
2813 	if (primary_memory == 0 && total_memory > memory)
2814 		primary_memory = total_memory - memory;
2815 	if (num_cpus > total_cpus || primary_num_cpus == 0)
2816 		errx(1, "not enough VCPU resources available");
2817 	if (memory > total_memory || primary_memory == 0)
2818 		errx(1, "not enough memory available");
2819 
2820 	hvmd_init(hvmd);
2821 	primary = primary_init();
2822 
2823 	for (resource_id = 0; resource_id <max_guests; resource_id++)
2824 		if (guests[resource_id] &&
2825 		    strcmp(guests[resource_id]->name, "primary") != 0)
2826 			guest_delete(guests[resource_id]);
2827 
2828 	primary->endpoint_id = 0;
2829 	TAILQ_FOREACH(endpoint, &primary->endpoint_list, link) {
2830 		if (endpoint->channel >= primary->endpoint_id)
2831 			primary->endpoint_id = endpoint->channel + 1;
2832 	}
2833 
2834 	for (i = 0; i < max_cpus; i++)
2835 		guest_delete_cpu(primary, i);
2836 	for (i = 0; i < primary_num_cpus; i++)
2837 		guest_add_cpu(primary, primary_stride);
2838 	guest_delete_memory(primary);
2839 	guest_add_memory(primary, -1, primary_memory);
2840 
2841 	SIMPLEQ_FOREACH(domain, &conf.domain_list, entry) {
2842 		if (strcmp(domain->name, "primary") != 0)
2843 			continue;
2844 		SIMPLEQ_FOREACH(var, &domain->var_list, entry)
2845 			guest_add_variable(primary, var->name, var->str);
2846 	}
2847 
2848 	SIMPLEQ_FOREACH(domain, &conf.domain_list, entry) {
2849 		if (strcmp(domain->name, "primary") == 0)
2850 			continue;
2851 		guest = guest_create(domain->name);
2852 		for (i = 0; i < domain->vcpu; i++)
2853 			guest_add_cpu(guest, domain->vcpu_stride);
2854 		guest_add_memory(guest, -1, domain->memory);
2855 		i = 0;
2856 		SIMPLEQ_FOREACH(vdisk, &domain->vdisk_list, entry)
2857 			guest_add_vdisk(guest, i++, vdisk->path,
2858 			    vdisk->devalias);
2859 		i = 0;
2860 		SIMPLEQ_FOREACH(vnet, &domain->vnet_list, entry)
2861 			guest_add_vnetwork(guest, i++, vnet->mac_addr,
2862 			    vnet->mtu, vnet->devalias);
2863 		SIMPLEQ_FOREACH(var, &domain->var_list, entry)
2864 			guest_add_variable(guest, var->name, var->str);
2865 		SIMPLEQ_FOREACH(iodev, &domain->iodev_list, entry)
2866 			guest_add_iodev(guest, iodev->path);
2867 
2868 		guest_finalize(guest);
2869 	}
2870 
2871 	TAILQ_FOREACH(component, &components, link) {
2872 		if (component->assigned)
2873 			continue;
2874 		guest_add_iodev(primary, component->path);
2875 	}
2876 
2877 	guest_finalize(primary);
2878 	hvmd_finalize();
2879 }
2880 
2881 void
2882 list_components(void)
2883 {
2884 	struct component *component;
2885 
2886 	pri = md_read("pri");
2887 	if (pri == NULL)
2888 		err(1, "unable to get PRI");
2889 
2890 	pri_init(pri);
2891 
2892 	TAILQ_FOREACH(component, &components, link) {
2893 		printf("%s\n", component->path);
2894 	}
2895 }
2896