xref: /onnv-gate/usr/src/cmd/mdb/intel/modules/mdb_kb/mdb_kb.c (revision 10175:dd9708d1f561)
15084Sjohnlev /*
25084Sjohnlev  * CDDL HEADER START
35084Sjohnlev  *
45084Sjohnlev  * The contents of this file are subject to the terms of the
55084Sjohnlev  * Common Development and Distribution License (the "License").
65084Sjohnlev  * You may not use this file except in compliance with the License.
75084Sjohnlev  *
85084Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev  * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev  * See the License for the specific language governing permissions
115084Sjohnlev  * and limitations under the License.
125084Sjohnlev  *
135084Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev  *
195084Sjohnlev  * CDDL HEADER END
205084Sjohnlev  */
215084Sjohnlev /*
22*10175SStuart.Maybee@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
235084Sjohnlev  * Use is subject to license terms.
245084Sjohnlev  */
255084Sjohnlev 
265084Sjohnlev /*
276144Srab  * KVM backend for hypervisor domain dumps.  We don't use libkvm for
286144Srab  * such dumps, since they do not have a namelist file or the typical
296144Srab  * dump structures we expect to aid bootstrapping.  Instead, we
306144Srab  * bootstrap based upon a debug_info structure at a known VA, using the
316144Srab  * guest's own page tables to resolve to physical addresses, and
326144Srab  * construct the namelist in a manner similar to ksyms_snapshot().
336144Srab  *
346144Srab  * Note that there are two formats understood by this module: the older,
356144Srab  * ad hoc format, which we call 'core' within this file, and an
366144Srab  * ELF-based format, known as 'elf'.
376144Srab  *
386144Srab  * We only support the older format generated on Solaris dom0: before we
396144Srab  * fixed it, core dump files were broken whenever a PFN didn't map a
406144Srab  * real MFN (!).
415084Sjohnlev  */
425084Sjohnlev 
435084Sjohnlev #include <strings.h>
445084Sjohnlev #include <stdio.h>
455084Sjohnlev #include <stdlib.h>
465084Sjohnlev #include <stddef.h>
475084Sjohnlev #include <stdarg.h>
485084Sjohnlev #include <unistd.h>
495084Sjohnlev #include <fcntl.h>
505084Sjohnlev #include <gelf.h>
515084Sjohnlev #include <errno.h>
525084Sjohnlev 
535084Sjohnlev #include <sys/mman.h>
545084Sjohnlev #include <sys/stat.h>
555084Sjohnlev #include <sys/debug_info.h>
565084Sjohnlev #include <sys/xen_mmu.h>
575084Sjohnlev #include <sys/elf.h>
585084Sjohnlev #include <sys/machelf.h>
595084Sjohnlev #include <sys/modctl.h>
605084Sjohnlev #include <sys/kobj.h>
615084Sjohnlev #include <sys/kobj_impl.h>
625084Sjohnlev #include <sys/sysmacros.h>
635084Sjohnlev #include <sys/privmregs.h>
645084Sjohnlev #include <vm/as.h>
655084Sjohnlev 
665084Sjohnlev #include <mdb/mdb_io.h>
675084Sjohnlev #include <mdb/mdb_kb.h>
685084Sjohnlev #include <mdb/mdb_target_impl.h>
695084Sjohnlev 
705084Sjohnlev #include <xen/public/xen.h>
716144Srab #include <xen/public/version.h>
726144Srab #include <xen/public/elfnote.h>
735084Sjohnlev 
745084Sjohnlev #define	XKB_SHDR_NULL 0
755084Sjohnlev #define	XKB_SHDR_SYMTAB 1
765084Sjohnlev #define	XKB_SHDR_STRTAB 2
775084Sjohnlev #define	XKB_SHDR_SHSTRTAB 3
785084Sjohnlev #define	XKB_SHDR_NUM 4
795084Sjohnlev 
805084Sjohnlev #define	XKB_WALK_LOCAL 0x1
815084Sjohnlev #define	XKB_WALK_GLOBAL 0x2
825084Sjohnlev #define	XKB_WALK_STR 0x4
835084Sjohnlev #define	XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
845084Sjohnlev 
856144Srab #if defined(__i386)
866144Srab #define	DEBUG_INFO 0xf4bff000
877486Sjohn.levon@sun.com #define	DEBUG_INFO_HVM 0xfe7ff000
886144Srab #elif defined(__amd64)
896144Srab #define	DEBUG_INFO 0xfffffffffb7ff000
907486Sjohn.levon@sun.com #define	DEBUG_INFO_HVM 0xfffffffffb7ff000
916144Srab #endif
926144Srab 
935084Sjohnlev #define	PAGE_SIZE 0x1000
945084Sjohnlev #define	PAGE_SHIFT 12
955084Sjohnlev #define	PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
965084Sjohnlev #define	PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
976144Srab #define	PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
987486Sjohn.levon@sun.com #define	PT_PADDR_LGPG 0x000fffffffffe000ull
995084Sjohnlev #define	PT_PADDR 0x000ffffffffff000ull
1005084Sjohnlev #define	PT_VALID 0x1
1017486Sjohn.levon@sun.com #define	PT_PAGESIZE 0x080
1027486Sjohn.levon@sun.com #define	PTE_IS_LGPG(p, l) ((l) > 0 && ((p) & PT_PAGESIZE))
1035084Sjohnlev 
1045084Sjohnlev #define	XC_CORE_MAGIC 0xF00FEBED
1055084Sjohnlev #define	XC_CORE_MAGIC_HVM 0xF00FEBEE
1065084Sjohnlev 
1075084Sjohnlev #define	VGCF_HVM_GUEST (1<<1)
1085084Sjohnlev 
1095084Sjohnlev typedef struct xc_core_header {
1105084Sjohnlev 	unsigned int xch_magic;
1115084Sjohnlev 	unsigned int xch_nr_vcpus;
1125084Sjohnlev 	unsigned int xch_nr_pages;
1135084Sjohnlev 	unsigned int xch_ctxt_offset;
1145084Sjohnlev 	unsigned int xch_index_offset;
1155084Sjohnlev 	unsigned int xch_pages_offset;
1165084Sjohnlev } xc_core_header_t;
1175084Sjohnlev 
1186144Srab struct xc_elf_header {
1196144Srab 	uint64_t xeh_magic;
1206144Srab 	uint64_t xeh_nr_vcpus;
1216144Srab 	uint64_t xeh_nr_pages;
1226144Srab 	uint64_t xeh_page_size;
1236144Srab };
1246144Srab 
1256144Srab struct xc_elf_version {
1266144Srab 	uint64_t xev_major;
1276144Srab 	uint64_t xev_minor;
1286144Srab 	xen_extraversion_t xev_extra;
1296144Srab 	xen_compile_info_t xev_compile_info;
1306144Srab 	xen_capabilities_info_t xev_capabilities;
1316144Srab 	xen_changeset_info_t xev_changeset;
1326144Srab 	xen_platform_parameters_t xev_platform_parameters;
1336144Srab 	uint64_t xev_pagesize;
1346144Srab };
1356144Srab 
1366144Srab /*
1376144Srab  * Either an old-style (3.0.4) core format, or the ELF format.
1386144Srab  */
1396144Srab typedef enum {
1406144Srab 	XKB_FORMAT_UNKNOWN = 0,
1416144Srab 	XKB_FORMAT_CORE = 1,
1426144Srab 	XKB_FORMAT_ELF = 2
1436144Srab } xkb_type_t;
1446144Srab 
1455084Sjohnlev typedef struct mfn_map {
1465084Sjohnlev 	mfn_t mm_mfn;
1475084Sjohnlev 	char *mm_map;
1485084Sjohnlev } mfn_map_t;
1495084Sjohnlev 
1505084Sjohnlev typedef struct mmu_info {
1515084Sjohnlev 	size_t mi_max;
1525084Sjohnlev 	size_t mi_shift[4];
1535084Sjohnlev 	size_t mi_ptes;
1545084Sjohnlev 	size_t mi_ptesize;
1555084Sjohnlev } mmu_info_t;
1565084Sjohnlev 
1576144Srab typedef struct xkb_core {
1586144Srab 	xc_core_header_t xc_hdr;
1596144Srab 	void *xc_p2m_buf;
1606144Srab } xkb_core_t;
1616144Srab 
1626144Srab typedef struct xkb_elf {
1636144Srab 	mdb_gelf_file_t *xe_gelf;
1646144Srab 	size_t *xe_off;
1656144Srab 	struct xc_elf_header xe_hdr;
1666144Srab 	struct xc_elf_version xe_version;
1676144Srab } xkb_elf_t;
1686144Srab 
1695084Sjohnlev typedef struct xkb {
1705084Sjohnlev 	char *xkb_path;
1715084Sjohnlev 	int xkb_fd;
1727486Sjohn.levon@sun.com 	int xkb_is_hvm;
1736144Srab 
1746144Srab 	xkb_type_t xkb_type;
1756144Srab 	xkb_core_t xkb_core;
1766144Srab 	xkb_elf_t xkb_elf;
1776144Srab 
1786144Srab 	size_t xkb_nr_vcpus;
1796144Srab 	size_t xkb_nr_pages;
1806144Srab 	size_t xkb_pages_off;
1816144Srab 	xen_pfn_t xkb_max_pfn;
1826144Srab 	mfn_t xkb_max_mfn;
1836144Srab 	int xkb_is_pae;
1846144Srab 
1856144Srab 	mmu_info_t xkb_mmu;
1866144Srab 	debug_info_t xkb_info;
1876144Srab 
188*10175SStuart.Maybee@Sun.COM 	void *xkb_vcpu_data;
189*10175SStuart.Maybee@Sun.COM 	size_t xkb_vcpu_data_sz;
190*10175SStuart.Maybee@Sun.COM 	struct vcpu_guest_context **xkb_vcpus;
1916144Srab 
1926144Srab 	char *xkb_pages;
1936144Srab 	mfn_t *xkb_p2m;
1946144Srab 	xen_pfn_t *xkb_m2p;
1956144Srab 	mfn_map_t xkb_pt_map[4];
1966144Srab 	mfn_map_t xkb_map;
1976144Srab 
1985084Sjohnlev 	char *xkb_namelist;
1995084Sjohnlev 	size_t xkb_namesize;
2005084Sjohnlev } xkb_t;
2015084Sjohnlev 
2025084Sjohnlev static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
2035084Sjohnlev 
2045084Sjohnlev typedef struct xkb_namelist {
2055084Sjohnlev 	Ehdr	kh_elf_hdr;
2065084Sjohnlev 	Phdr	kh_text_phdr;
2075084Sjohnlev 	Phdr	kh_data_phdr;
2085084Sjohnlev 	Shdr	kh_shdr[XKB_SHDR_NUM];
2095084Sjohnlev 	char	shstrings[sizeof (xkb_shstrtab)];
2105084Sjohnlev } xkb_namelist_t;
2115084Sjohnlev 
2125084Sjohnlev static int xkb_build_ksyms(xkb_t *);
2135084Sjohnlev static offset_t xkb_mfn_to_offset(xkb_t *, mfn_t);
2145084Sjohnlev static mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t);
2155084Sjohnlev static ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t);
2165084Sjohnlev static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
2175084Sjohnlev static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
2185084Sjohnlev static int xkb_close(xkb_t *);
2195084Sjohnlev 
2206144Srab /*
2216144Srab  * Jump through the hoops we need to to correctly identify a core file
2226144Srab  * of either the old or new format.
2236144Srab  */
2245084Sjohnlev int
xkb_identify(const char * file,int * longmode)2255084Sjohnlev xkb_identify(const char *file, int *longmode)
2265084Sjohnlev {
2275084Sjohnlev 	xc_core_header_t header;
2286144Srab 	mdb_gelf_file_t *gf = NULL;
2296144Srab 	mdb_gelf_sect_t *sect = NULL;
2306144Srab 	mdb_io_t *io = NULL;
2316144Srab 	char *notes = NULL;
2326144Srab 	char *pos;
2336144Srab 	int ret = 0;
2345084Sjohnlev 	size_t sz;
2355084Sjohnlev 	int fd;
2365084Sjohnlev 
2375084Sjohnlev 	if ((fd = open64(file, O_RDONLY)) == -1)
2385084Sjohnlev 		return (-1);
2395084Sjohnlev 
2405084Sjohnlev 	if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) {
2415084Sjohnlev 		(void) close(fd);
2425084Sjohnlev 		return (0);
2435084Sjohnlev 	}
2445084Sjohnlev 
2455084Sjohnlev 	(void) close(fd);
2465084Sjohnlev 
2476144Srab 	if (header.xch_magic == XC_CORE_MAGIC) {
2486144Srab 		*longmode = 0;
2495084Sjohnlev 
2506144Srab 		/*
2516144Srab 		 * Indeed.
2526144Srab 		 */
2536144Srab 		sz = header.xch_index_offset - header.xch_ctxt_offset;
2545084Sjohnlev #ifdef _LP64
2556144Srab 		if (sizeof (struct vcpu_guest_context) *
2566144Srab 		    header.xch_nr_vcpus == sz)
2576144Srab 			*longmode = 1;
2585084Sjohnlev #else
2596144Srab 		if (sizeof (struct vcpu_guest_context) *
2606144Srab 		    header.xch_nr_vcpus != sz)
2616144Srab 			*longmode = 1;
2625084Sjohnlev #endif /* _LP64 */
2635084Sjohnlev 
2646144Srab 		return (1);
2656144Srab 	}
2666144Srab 
2676144Srab 	if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
2686144Srab 		return (-1);
2696144Srab 
2706144Srab 	if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
2716144Srab 		goto out;
2726144Srab 
2736144Srab 	if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
2746144Srab 		goto out;
2756144Srab 
2766144Srab 	if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
2776144Srab 		goto out;
2786144Srab 
2796144Srab 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
2806144Srab 		struct xc_elf_version *vers;
2816144Srab 		/* LINTED - alignment */
2826144Srab 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
2836144Srab 		char *desc;
2846144Srab 		char *name;
2856144Srab 
2866144Srab 		name = pos + sizeof (*nhdr);
2876144Srab 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
2886144Srab 
2896144Srab 		pos = desc + nhdr->n_descsz;
2906144Srab 
2916144Srab 		if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
2926144Srab 			continue;
2936144Srab 
2946144Srab 		/*
2956144Srab 		 * The contents of this struct differ between 32 and 64
2966144Srab 		 * bit; however, not until past the 'xev_capabilities'
2976144Srab 		 * member, so we can just about get away with this.
2986144Srab 		 */
2996144Srab 
3006144Srab 		/* LINTED - alignment */
3016144Srab 		vers = (struct xc_elf_version *)desc;
3026144Srab 
3036144Srab 		if (strstr(vers->xev_capabilities, "x86_64")) {
304*10175SStuart.Maybee@Sun.COM 			/*
305*10175SStuart.Maybee@Sun.COM 			 * 64-bit hypervisor, but it can still be
306*10175SStuart.Maybee@Sun.COM 			 * a 32-bit domain core. 32-bit domain cores
307*10175SStuart.Maybee@Sun.COM 			 * are also dumped in Elf64 format, but they
308*10175SStuart.Maybee@Sun.COM 			 * have e_machine set to EM_386, not EM_AMD64.
309*10175SStuart.Maybee@Sun.COM 			 */
310*10175SStuart.Maybee@Sun.COM 			if (gf->gf_ehdr.e_machine == EM_386)
311*10175SStuart.Maybee@Sun.COM 				*longmode = 0;
312*10175SStuart.Maybee@Sun.COM 			else
313*10175SStuart.Maybee@Sun.COM 				*longmode = 1;
3146144Srab 		} else if (strstr(vers->xev_capabilities, "x86_32") ||
3156144Srab 		    strstr(vers->xev_capabilities, "x86_32p")) {
316*10175SStuart.Maybee@Sun.COM 			/*
317*10175SStuart.Maybee@Sun.COM 			 * 32-bit hypervisor, can only be a 32-bit core.
318*10175SStuart.Maybee@Sun.COM 			 */
3196144Srab 			*longmode = 0;
3206144Srab 		} else {
3216144Srab 			mdb_warn("couldn't derive word size of dump; "
3226144Srab 			    "assuming 64-bit");
3236144Srab 			*longmode = 1;
3246144Srab 		}
3256144Srab 	}
3266144Srab 
3276144Srab 	ret = 1;
3286144Srab 
3296144Srab out:
3306144Srab 	if (gf != NULL)
3316144Srab 		mdb_gelf_destroy(gf);
3326144Srab 	else if (io != NULL)
3336144Srab 		mdb_io_destroy(io);
3346144Srab 	return (ret);
3355084Sjohnlev }
3365084Sjohnlev 
3375084Sjohnlev static void *
xkb_fail(xkb_t * xkb,const char * msg,...)3385084Sjohnlev xkb_fail(xkb_t *xkb, const char *msg, ...)
3395084Sjohnlev {
3405084Sjohnlev 	va_list args;
3415084Sjohnlev 
3425084Sjohnlev 	va_start(args, msg);
3435084Sjohnlev 	if (xkb != NULL)
3445084Sjohnlev 		(void) fprintf(stderr, "%s: ", xkb->xkb_path);
3455084Sjohnlev 	(void) vfprintf(stderr, msg, args);
3465084Sjohnlev 	(void) fprintf(stderr, "\n");
3475084Sjohnlev 	va_end(args);
3485084Sjohnlev 	if (xkb != NULL)
3495084Sjohnlev 		(void) xkb_close(xkb);
3506144Srab 
3516144Srab 	errno = ENOEXEC;
3526144Srab 
3535084Sjohnlev 	return (NULL);
3545084Sjohnlev }
3555084Sjohnlev 
3565084Sjohnlev static int
xkb_build_m2p(xkb_t * xkb)3575084Sjohnlev xkb_build_m2p(xkb_t *xkb)
3585084Sjohnlev {
3595084Sjohnlev 	size_t i;
3605084Sjohnlev 
3616144Srab 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
3625084Sjohnlev 		if (xkb->xkb_p2m[i] != MFN_INVALID &&
3635084Sjohnlev 		    xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
3645084Sjohnlev 			xkb->xkb_max_mfn = xkb->xkb_p2m[i];
3655084Sjohnlev 	}
3665084Sjohnlev 
3675084Sjohnlev 	xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t),
3685084Sjohnlev 	    UM_SLEEP);
3695084Sjohnlev 
3705084Sjohnlev 	for (i = 0; i <= xkb->xkb_max_mfn; i++)
3715084Sjohnlev 		xkb->xkb_m2p[i] = PFN_INVALID;
3725084Sjohnlev 
3736144Srab 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
3745084Sjohnlev 		if (xkb->xkb_p2m[i] != MFN_INVALID)
3755084Sjohnlev 			xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
3765084Sjohnlev 	}
3775084Sjohnlev 
3785084Sjohnlev 	return (1);
3795084Sjohnlev }
3805084Sjohnlev 
3815084Sjohnlev /*
3826144Srab  * With FORMAT_CORE, we can use the table in the dump file directly.
3836144Srab  * Just to make things fun, they've not page-aligned the p2m table.
3845084Sjohnlev  */
3855084Sjohnlev static int
xkb_map_p2m(xkb_t * xkb)3865084Sjohnlev xkb_map_p2m(xkb_t *xkb)
3875084Sjohnlev {
3885084Sjohnlev 	offset_t off;
3895084Sjohnlev 	size_t size;
3906144Srab 	xkb_core_t *xc = &xkb->xkb_core;
3916144Srab 	size_t count = xkb->xkb_nr_pages;
3926144Srab 	size_t boff = xc->xc_hdr.xch_index_offset;
3935084Sjohnlev 
3946144Srab 	size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
3955084Sjohnlev 	size = PAGE_MASK(size);
3965084Sjohnlev 	off = PAGE_MASK(boff);
3975084Sjohnlev 
3985084Sjohnlev 	/* LINTED - alignment */
3996144Srab 	xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
4005084Sjohnlev 	    MAP_SHARED, xkb->xkb_fd, off);
4015084Sjohnlev 
4026144Srab 	if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
4035084Sjohnlev 		(void) xkb_fail(xkb, "cannot map p2m table");
4045084Sjohnlev 		return (0);
4055084Sjohnlev 	}
4065084Sjohnlev 
4075084Sjohnlev 	/* LINTED - alignment */
4086144Srab 	xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
4095084Sjohnlev 	    PAGE_OFFSET(boff));
4105084Sjohnlev 
4115084Sjohnlev 	return (1);
4125084Sjohnlev }
4135084Sjohnlev 
4145084Sjohnlev /*
4156144Srab  * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
4166144Srab  * into a linear array indexed by pfn for convenience.  We also need to
4176144Srab  * track the mapping between mfn and the offset in the file: a pfn with
4186144Srab  * no mfn will not appear in the core file.
4196144Srab  */
4206144Srab static int
xkb_build_p2m(xkb_t * xkb)4216144Srab xkb_build_p2m(xkb_t *xkb)
4226144Srab {
4236144Srab 	xkb_elf_t *xe = &xkb->xkb_elf;
4246144Srab 	mdb_gelf_sect_t *sect;
4256144Srab 	size_t size;
4266144Srab 	size_t i;
4276144Srab 
4286144Srab 	struct elf_p2m {
4296144Srab 		uint64_t pfn;
4306144Srab 		uint64_t gmfn;
4316144Srab 	} *p2m;
4326144Srab 
4336144Srab 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
4346144Srab 
4356144Srab 	if (sect == NULL) {
4366144Srab 		(void) xkb_fail(xkb, "cannot find section .xen_p2m");
4376144Srab 		return (0);
4386144Srab 	}
4396144Srab 
4406144Srab 	if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
4416144Srab 		(void) xkb_fail(xkb, "couldn't read .xen_p2m");
4426144Srab 		return (0);
4436144Srab 	}
4446144Srab 
4456144Srab 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
4466144Srab 		if (p2m[i].pfn > xkb->xkb_max_pfn)
4476144Srab 			xkb->xkb_max_pfn = p2m[i].pfn;
4486144Srab 	}
4496144Srab 
4506144Srab 	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
4516144Srab 	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
4526144Srab 	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
4536144Srab 	xe->xe_off = mdb_alloc(size, UM_SLEEP);
4546144Srab 
4556144Srab 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
4566144Srab 		xkb->xkb_p2m[i] = PFN_INVALID;
4576144Srab 		xe->xe_off[i] = (size_t)-1;
4586144Srab 	}
4596144Srab 
4606144Srab 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
4616144Srab 		xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
4626144Srab 		xe->xe_off[p2m[i].pfn] = i;
4636144Srab 	}
4646144Srab 
4656144Srab 	return (1);
4666144Srab }
4676144Srab 
4686144Srab /*
4697486Sjohn.levon@sun.com  * For HVM images, we don't have the corresponding MFN list; the table
4707486Sjohn.levon@sun.com  * is just a mapping from page index in the dump to the corresponding
4717486Sjohn.levon@sun.com  * PFN.  To simplify the other code, we'll pretend that these PFNs are
4727486Sjohn.levon@sun.com  * really MFNs as well, by populating xkb_p2m.
4737486Sjohn.levon@sun.com  */
4747486Sjohn.levon@sun.com static int
xkb_build_fake_p2m(xkb_t * xkb)4757486Sjohn.levon@sun.com xkb_build_fake_p2m(xkb_t *xkb)
4767486Sjohn.levon@sun.com {
4777486Sjohn.levon@sun.com 	xkb_elf_t *xe = &xkb->xkb_elf;
4787486Sjohn.levon@sun.com 	mdb_gelf_sect_t *sect;
4797486Sjohn.levon@sun.com 	size_t size;
4807486Sjohn.levon@sun.com 	size_t i;
4817486Sjohn.levon@sun.com 
4827486Sjohn.levon@sun.com 	uint64_t *p2pfn;
4837486Sjohn.levon@sun.com 
4847486Sjohn.levon@sun.com 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pfn");
4857486Sjohn.levon@sun.com 
4867486Sjohn.levon@sun.com 	if (sect == NULL) {
4877486Sjohn.levon@sun.com 		(void) xkb_fail(xkb, "cannot find section .xen_pfn");
4887486Sjohn.levon@sun.com 		return (0);
4897486Sjohn.levon@sun.com 	}
4907486Sjohn.levon@sun.com 
4917486Sjohn.levon@sun.com 	if ((p2pfn = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
4927486Sjohn.levon@sun.com 		(void) xkb_fail(xkb, "couldn't read .xen_pfn");
4937486Sjohn.levon@sun.com 		return (0);
4947486Sjohn.levon@sun.com 	}
4957486Sjohn.levon@sun.com 
4967486Sjohn.levon@sun.com 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
497*10175SStuart.Maybee@Sun.COM 		if (p2pfn[i] != PFN_INVALID && p2pfn[i] > xkb->xkb_max_pfn)
4987486Sjohn.levon@sun.com 			xkb->xkb_max_pfn = p2pfn[i];
4997486Sjohn.levon@sun.com 	}
5007486Sjohn.levon@sun.com 
5017486Sjohn.levon@sun.com 	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
5027486Sjohn.levon@sun.com 	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
503*10175SStuart.Maybee@Sun.COM 
5047486Sjohn.levon@sun.com 	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
5057486Sjohn.levon@sun.com 	xe->xe_off = mdb_alloc(size, UM_SLEEP);
5067486Sjohn.levon@sun.com 
5077486Sjohn.levon@sun.com 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
5087486Sjohn.levon@sun.com 		xkb->xkb_p2m[i] = PFN_INVALID;
5097486Sjohn.levon@sun.com 		xe->xe_off[i] = (size_t)-1;
5107486Sjohn.levon@sun.com 	}
5117486Sjohn.levon@sun.com 
5127486Sjohn.levon@sun.com 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
513*10175SStuart.Maybee@Sun.COM 		if (p2pfn[i] == PFN_INVALID)
514*10175SStuart.Maybee@Sun.COM 			continue;
5157486Sjohn.levon@sun.com 		xkb->xkb_p2m[p2pfn[i]] = p2pfn[i];
5167486Sjohn.levon@sun.com 		xe->xe_off[p2pfn[i]] = i;
5177486Sjohn.levon@sun.com 	}
5187486Sjohn.levon@sun.com 
5197486Sjohn.levon@sun.com 	return (1);
5207486Sjohn.levon@sun.com }
5217486Sjohn.levon@sun.com 
5227486Sjohn.levon@sun.com /*
5235084Sjohnlev  * Return the MFN of the top-level page table for the given as.
5245084Sjohnlev  */
5255084Sjohnlev static mfn_t
xkb_as_to_mfn(xkb_t * xkb,struct as * as)5265084Sjohnlev xkb_as_to_mfn(xkb_t *xkb, struct as *as)
5275084Sjohnlev {
5285084Sjohnlev 	uintptr_t asp = (uintptr_t)as;
5295084Sjohnlev 	uintptr_t hatp;
5305084Sjohnlev 	uintptr_t htablep;
5315084Sjohnlev 	uintptr_t pfn;
5325084Sjohnlev 
5335084Sjohnlev 	if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp))
5345084Sjohnlev 		return (MFN_INVALID);
5355084Sjohnlev 	if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off,
5365084Sjohnlev 	    &htablep))
5375084Sjohnlev 		return (MFN_INVALID);
5385084Sjohnlev 	if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off,
5395084Sjohnlev 	    &pfn))
5405084Sjohnlev 		return (MFN_INVALID);
5415084Sjohnlev 
5426144Srab 	if (pfn > xkb->xkb_max_pfn)
5435084Sjohnlev 		return (MFN_INVALID);
5445084Sjohnlev 
5455084Sjohnlev 	return (xkb->xkb_p2m[pfn]);
5465084Sjohnlev }
5475084Sjohnlev 
5487486Sjohn.levon@sun.com static mfn_t
xkb_cr3_to_pfn(xkb_t * xkb)5497486Sjohn.levon@sun.com xkb_cr3_to_pfn(xkb_t *xkb)
5507486Sjohn.levon@sun.com {
551*10175SStuart.Maybee@Sun.COM 	uint64_t cr3 = xkb->xkb_vcpus[0]->ctrlreg[3];
5527486Sjohn.levon@sun.com 	if (xkb->xkb_is_hvm)
5537486Sjohn.levon@sun.com 		return (cr3 >> PAGE_SHIFT);
5547486Sjohn.levon@sun.com 	return (xen_cr3_to_pfn(cr3));
5557486Sjohn.levon@sun.com }
5567486Sjohn.levon@sun.com 
5575084Sjohnlev static ssize_t
xkb_read_helper(xkb_t * xkb,struct as * as,int phys,uint64_t addr,void * buf,size_t size)5585084Sjohnlev xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
5595084Sjohnlev     void *buf, size_t size)
5605084Sjohnlev {
5615084Sjohnlev 	size_t left = size;
5626144Srab 	int windowed = (xkb->xkb_pages == NULL);
5637486Sjohn.levon@sun.com 	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
5645084Sjohnlev 
5655084Sjohnlev 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
5665084Sjohnlev 		return (-1);
5675084Sjohnlev 
5685084Sjohnlev 	while (left) {
5695084Sjohnlev 		uint64_t pos = addr + (size - left);
5705084Sjohnlev 		char *outpos = (char *)buf + (size - left);
5715084Sjohnlev 		size_t pageoff = PAGE_OFFSET(pos);
5725084Sjohnlev 		size_t sz = MIN(left, PAGE_SIZE - pageoff);
5735084Sjohnlev 		mfn_t mfn;
5745084Sjohnlev 
5755084Sjohnlev 		if (!phys) {
5765084Sjohnlev 			mfn = xkb_va_to_mfn(xkb, pos, tlmfn);
5775084Sjohnlev 			if (mfn == MFN_INVALID)
5785084Sjohnlev 				return (-1);
5795084Sjohnlev 		} else {
5805084Sjohnlev 			xen_pfn_t pfn = pos >> PAGE_SHIFT;
5816144Srab 			if (pfn > xkb->xkb_max_pfn)
5825084Sjohnlev 				return (-1);
5835084Sjohnlev 			mfn = xkb->xkb_p2m[pfn];
5845084Sjohnlev 			if (mfn == MFN_INVALID)
5855084Sjohnlev 				return (-1);
5865084Sjohnlev 		}
5875084Sjohnlev 
5885084Sjohnlev 		/*
5895084Sjohnlev 		 * If we're windowed then pread() is much faster.
5905084Sjohnlev 		 */
5915084Sjohnlev 		if (windowed) {
5925084Sjohnlev 			offset_t off = xkb_mfn_to_offset(xkb, mfn);
5935084Sjohnlev 			int ret;
5945084Sjohnlev 
5955084Sjohnlev 			if (off == ~1ULL)
5965084Sjohnlev 				return (-1);
5975084Sjohnlev 
5985084Sjohnlev 			off += pageoff;
5995084Sjohnlev 
6005084Sjohnlev 			ret = pread64(xkb->xkb_fd, outpos, sz, off);
6015084Sjohnlev 			if (ret == -1)
6025084Sjohnlev 				return (-1);
6035084Sjohnlev 			if (ret != sz)
6045084Sjohnlev 				return ((size - left) + ret);
6055084Sjohnlev 
6065084Sjohnlev 			left -= ret;
6075084Sjohnlev 		} else {
6085084Sjohnlev 			if (xkb_map_mfn(xkb, mfn, &xkb->xkb_map) == NULL)
6095084Sjohnlev 				return (-1);
6105084Sjohnlev 
6115084Sjohnlev 			bcopy(xkb->xkb_map.mm_map + pageoff, outpos, sz);
6125084Sjohnlev 
6135084Sjohnlev 			left -= sz;
6145084Sjohnlev 		}
6155084Sjohnlev 	}
6165084Sjohnlev 
6175084Sjohnlev 	return (size);
6185084Sjohnlev }
6195084Sjohnlev 
6205084Sjohnlev static ssize_t
xkb_pread(xkb_t * xkb,uint64_t addr,void * buf,size_t size)6215084Sjohnlev xkb_pread(xkb_t *xkb, uint64_t addr, void *buf, size_t size)
6225084Sjohnlev {
6235084Sjohnlev 	return (xkb_read_helper(xkb, NULL, 1, addr, buf, size));
6245084Sjohnlev }
6255084Sjohnlev 
6265084Sjohnlev static ssize_t
xkb_aread(xkb_t * xkb,uintptr_t addr,void * buf,size_t size,struct as * as)6275084Sjohnlev xkb_aread(xkb_t *xkb, uintptr_t addr, void *buf, size_t size, struct as *as)
6285084Sjohnlev {
6295084Sjohnlev 	return (xkb_read_helper(xkb, as, 0, addr, buf, size));
6305084Sjohnlev }
6315084Sjohnlev 
6325084Sjohnlev static ssize_t
xkb_read(xkb_t * xkb,uintptr_t addr,void * buf,size_t size)6335084Sjohnlev xkb_read(xkb_t *xkb, uintptr_t addr, void *buf, size_t size)
6345084Sjohnlev {
6355084Sjohnlev 	return (xkb_aread(xkb, addr, buf, size, NULL));
6365084Sjohnlev }
6375084Sjohnlev 
6385084Sjohnlev static int
xkb_read_word(xkb_t * xkb,uintptr_t addr,uintptr_t * buf)6395084Sjohnlev xkb_read_word(xkb_t *xkb, uintptr_t addr, uintptr_t *buf)
6405084Sjohnlev {
6415084Sjohnlev 	if (xkb_read(xkb, addr, buf, sizeof (uintptr_t)) !=
6425084Sjohnlev 	    sizeof (uintptr_t))
6435084Sjohnlev 		return (0);
6445084Sjohnlev 	return (1);
6455084Sjohnlev }
6465084Sjohnlev 
6475084Sjohnlev static char *
xkb_readstr(xkb_t * xkb,uintptr_t addr)6485084Sjohnlev xkb_readstr(xkb_t *xkb, uintptr_t addr)
6495084Sjohnlev {
6505084Sjohnlev 	char *str = mdb_alloc(1024, UM_SLEEP);
6515084Sjohnlev 	size_t i;
6525084Sjohnlev 
6535084Sjohnlev 	for (i = 0; i < 1024; i++) {
6545084Sjohnlev 		if (xkb_read(xkb, addr + i, &str[i], 1) != 1) {
6555084Sjohnlev 			mdb_free(str, 1024);
6565084Sjohnlev 			return (NULL);
6575084Sjohnlev 		}
6585084Sjohnlev 
6595084Sjohnlev 		if (str[i] == '\0')
6605084Sjohnlev 			break;
6615084Sjohnlev 	}
6625084Sjohnlev 
6635084Sjohnlev 	if (i == 1024) {
6645084Sjohnlev 		mdb_free(str, 1024);
6655084Sjohnlev 		return (NULL);
6665084Sjohnlev 	}
6675084Sjohnlev 
6685084Sjohnlev 	return (str);
6695084Sjohnlev }
6705084Sjohnlev 
6715084Sjohnlev static offset_t
xkb_pfn_to_off(xkb_t * xkb,xen_pfn_t pfn)6726144Srab xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn)
6736144Srab {
6746144Srab 	if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn)
6756144Srab 		return (-1ULL);
6766144Srab 
6776144Srab 	if (xkb->xkb_type == XKB_FORMAT_CORE)
6786144Srab 		return (PAGE_SIZE * pfn);
6796144Srab 
6806144Srab 	return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn]));
6816144Srab }
6826144Srab 
6836144Srab static offset_t
xkb_mfn_to_offset(xkb_t * xkb,mfn_t mfn)6845084Sjohnlev xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
6855084Sjohnlev {
6865084Sjohnlev 	xen_pfn_t pfn;
6875084Sjohnlev 
6885084Sjohnlev 	if (mfn > xkb->xkb_max_mfn)
6895084Sjohnlev 		return (-1ULL);
6905084Sjohnlev 
6915084Sjohnlev 	pfn = xkb->xkb_m2p[mfn];
6925084Sjohnlev 
6935084Sjohnlev 	if (pfn == PFN_INVALID)
6945084Sjohnlev 		return (-1ULL);
6955084Sjohnlev 
6966144Srab 	return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn));
6975084Sjohnlev }
6985084Sjohnlev 
6995084Sjohnlev static char *
xkb_map_mfn(xkb_t * xkb,mfn_t mfn,mfn_map_t * mm)7005084Sjohnlev xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
7015084Sjohnlev {
7026144Srab 	int windowed = (xkb->xkb_pages == NULL);
7035084Sjohnlev 	offset_t off;
7045084Sjohnlev 
7055084Sjohnlev 	if (mm->mm_mfn == mfn)
7065084Sjohnlev 		return (mm->mm_map);
7075084Sjohnlev 
7085084Sjohnlev 	mm->mm_mfn = mfn;
7095084Sjohnlev 
7105084Sjohnlev 	if (windowed) {
7115084Sjohnlev 		if (mm->mm_map != (char *)MAP_FAILED) {
7125084Sjohnlev 			(void) munmap(mm->mm_map, PAGE_SIZE);
7135084Sjohnlev 			mm->mm_map = (void *)MAP_FAILED;
7145084Sjohnlev 		}
7155084Sjohnlev 
7165084Sjohnlev 		if ((off = xkb_mfn_to_offset(xkb, mfn)) == (-1ULL))
7175084Sjohnlev 			return (NULL);
7185084Sjohnlev 
7195084Sjohnlev 		mm->mm_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
7205084Sjohnlev 		    xkb->xkb_fd, off);
7215084Sjohnlev 
7225084Sjohnlev 		if (mm->mm_map == (char *)MAP_FAILED)
7235084Sjohnlev 			return (NULL);
7245084Sjohnlev 	} else {
7255084Sjohnlev 		xen_pfn_t pfn;
7265084Sjohnlev 
7275084Sjohnlev 		mm->mm_map = NULL;
7285084Sjohnlev 
7295084Sjohnlev 		if (mfn > xkb->xkb_max_mfn)
7305084Sjohnlev 			return (NULL);
7315084Sjohnlev 
7325084Sjohnlev 		pfn = xkb->xkb_m2p[mfn];
7335084Sjohnlev 
7345084Sjohnlev 		if (pfn == PFN_INVALID)
7355084Sjohnlev 			return (NULL);
7365084Sjohnlev 
7376144Srab 		mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn);
7385084Sjohnlev 	}
7395084Sjohnlev 
7405084Sjohnlev 	return (mm->mm_map);
7415084Sjohnlev }
7425084Sjohnlev 
7437486Sjohn.levon@sun.com static uint64_t
xkb_get_pte(mmu_info_t * mmu,char * ptep)7447486Sjohn.levon@sun.com xkb_get_pte(mmu_info_t *mmu, char *ptep)
7455084Sjohnlev {
7466144Srab 	uint64_t pte = 0;
7475084Sjohnlev 
7486144Srab 	if (mmu->mi_ptesize == 8) {
7496144Srab 		/* LINTED - alignment */
7506144Srab 		pte = *((uint64_t *)ptep);
7516144Srab 	} else {
7525084Sjohnlev 		/* LINTED - alignment */
7535084Sjohnlev 		pte = *((uint32_t *)ptep);
7545084Sjohnlev 	}
7555084Sjohnlev 
7567486Sjohn.levon@sun.com 	return (pte);
7577486Sjohn.levon@sun.com }
7585084Sjohnlev 
7597486Sjohn.levon@sun.com static mfn_t
xkb_pte_to_base_mfn(uint64_t pte,size_t level)7607486Sjohn.levon@sun.com xkb_pte_to_base_mfn(uint64_t pte, size_t level)
7617486Sjohn.levon@sun.com {
7627486Sjohn.levon@sun.com 	if (PTE_IS_LGPG(pte, level)) {
7637486Sjohn.levon@sun.com 		pte &= PT_PADDR_LGPG;
7647486Sjohn.levon@sun.com 	} else {
7657486Sjohn.levon@sun.com 		pte &= PT_PADDR;
7667486Sjohn.levon@sun.com 	}
7675084Sjohnlev 
7685084Sjohnlev 	return (pte >> PAGE_SHIFT);
7695084Sjohnlev }
7705084Sjohnlev 
7715084Sjohnlev /*
7725084Sjohnlev  * Resolve the given VA into an MFN, using the provided mfn as a top-level page
7735084Sjohnlev  * table.
7745084Sjohnlev  */
7755084Sjohnlev static mfn_t
xkb_va_to_mfn(xkb_t * xkb,uintptr_t va,mfn_t mfn)7765084Sjohnlev xkb_va_to_mfn(xkb_t *xkb, uintptr_t va, mfn_t mfn)
7775084Sjohnlev {
7785084Sjohnlev 	mmu_info_t *mmu = &xkb->xkb_mmu;
7797486Sjohn.levon@sun.com 	uint64_t pte;
7805084Sjohnlev 	size_t level;
7815084Sjohnlev 
7825084Sjohnlev 	for (level = mmu->mi_max; ; --level) {
7835084Sjohnlev 		size_t entry;
7845084Sjohnlev 
7855084Sjohnlev 		if (xkb_map_mfn(xkb, mfn, &xkb->xkb_pt_map[level]) == NULL)
7865084Sjohnlev 			return (MFN_INVALID);
7875084Sjohnlev 
7885084Sjohnlev 		entry = (va >> mmu->mi_shift[level]) & (mmu->mi_ptes - 1);
7895084Sjohnlev 
7907486Sjohn.levon@sun.com 		pte = xkb_get_pte(mmu, (char *)xkb->xkb_pt_map[level].mm_map +
7917486Sjohn.levon@sun.com 		    entry * mmu->mi_ptesize);
7925084Sjohnlev 
7937486Sjohn.levon@sun.com 		if ((mfn = xkb_pte_to_base_mfn(pte, level)) == MFN_INVALID)
7945084Sjohnlev 			return (MFN_INVALID);
7955084Sjohnlev 
7965084Sjohnlev 		if (level == 0)
7975084Sjohnlev 			break;
7987486Sjohn.levon@sun.com 
7997486Sjohn.levon@sun.com 		/*
8007486Sjohn.levon@sun.com 		 * Currently 'mfn' refers to the base MFN of the
8017486Sjohn.levon@sun.com 		 * large-page mapping.  Add on the 4K-sized index into
8027486Sjohn.levon@sun.com 		 * the large-page mapping to get the right MFN within
8037486Sjohn.levon@sun.com 		 * the mapping.
8047486Sjohn.levon@sun.com 		 */
8057486Sjohn.levon@sun.com 		if (PTE_IS_LGPG(pte, level)) {
8067486Sjohn.levon@sun.com 			mfn += (va & ((1 << mmu->mi_shift[level]) - 1)) >>
8077486Sjohn.levon@sun.com 			    PAGE_SHIFT;
8087486Sjohn.levon@sun.com 			break;
8097486Sjohn.levon@sun.com 		}
8105084Sjohnlev 	}
8115084Sjohnlev 
8125084Sjohnlev 	return (mfn);
8135084Sjohnlev }
8145084Sjohnlev 
8155084Sjohnlev static int
xkb_read_module(xkb_t * xkb,uintptr_t modulep,struct module * module,uintptr_t * sym_addr,uintptr_t * sym_count,uintptr_t * str_addr)8165084Sjohnlev xkb_read_module(xkb_t *xkb, uintptr_t modulep, struct module *module,
8175084Sjohnlev     uintptr_t *sym_addr, uintptr_t *sym_count, uintptr_t *str_addr)
8185084Sjohnlev {
8195084Sjohnlev 	if (xkb_read(xkb, modulep, module, sizeof (struct module)) !=
8205084Sjohnlev 	    sizeof (struct module))
8215084Sjohnlev 		return (0);
8225084Sjohnlev 
8235084Sjohnlev 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
8245084Sjohnlev 	    offsetof(Shdr, sh_addr), sym_addr))
8255084Sjohnlev 		return (0);
8265084Sjohnlev 
8275084Sjohnlev 	if (!xkb_read_word(xkb, (uintptr_t)module->strhdr +
8285084Sjohnlev 	    offsetof(Shdr, sh_addr), str_addr))
8295084Sjohnlev 		return (0);
8305084Sjohnlev 
8315084Sjohnlev 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
8325084Sjohnlev 	    offsetof(Shdr, sh_size), sym_count))
8335084Sjohnlev 		return (0);
8345084Sjohnlev 	*sym_count /= sizeof (Sym);
8355084Sjohnlev 
8365084Sjohnlev 	return (1);
8375084Sjohnlev }
8385084Sjohnlev 
8395084Sjohnlev static int
xkb_read_modsyms(xkb_t * xkb,char ** buf,size_t * sizes,int types,uintptr_t sym_addr,uintptr_t str_addr,uintptr_t sym_count)8405084Sjohnlev xkb_read_modsyms(xkb_t *xkb, char **buf, size_t *sizes, int types,
8415084Sjohnlev     uintptr_t sym_addr, uintptr_t str_addr, uintptr_t sym_count)
8425084Sjohnlev {
8435084Sjohnlev 	size_t i;
8445084Sjohnlev 
8455084Sjohnlev 	for (i = 0; i < sym_count; i++) {
8465084Sjohnlev 		Sym sym;
8475084Sjohnlev 		char *name;
8485084Sjohnlev 		size_t sz;
8495084Sjohnlev 		int type = XKB_WALK_GLOBAL;
8505084Sjohnlev 
8515084Sjohnlev 		if (xkb_read(xkb, sym_addr + i * sizeof (sym), &sym,
8525084Sjohnlev 		    sizeof (sym)) != sizeof (sym))
8535084Sjohnlev 			return (0);
8545084Sjohnlev 
8555084Sjohnlev 		if (GELF_ST_BIND(sym.st_info) == STB_LOCAL)
8565084Sjohnlev 			type = XKB_WALK_LOCAL;
8575084Sjohnlev 
8585084Sjohnlev 		name = xkb_readstr(xkb, str_addr + sym.st_name);
8595084Sjohnlev 
8605084Sjohnlev 		sym.st_shndx = SHN_ABS;
8615084Sjohnlev 		sym.st_name = sizes[XKB_WALK_STR];
8625084Sjohnlev 
8635084Sjohnlev 		sizes[type] += sizeof (sym);
8645084Sjohnlev 		sz = strlen(name) + 1;
8655084Sjohnlev 		sizes[XKB_WALK_STR] += sz;
8665084Sjohnlev 
8675084Sjohnlev 		if (buf != NULL) {
8685084Sjohnlev 			if (types & type) {
8695084Sjohnlev 				bcopy(&sym, *buf, sizeof (sym));
8705084Sjohnlev 				*buf += sizeof (sym);
8715084Sjohnlev 			}
8725084Sjohnlev 			if (types & XKB_WALK_STR) {
8735084Sjohnlev 				bcopy(name, *buf, sz);
8745084Sjohnlev 				*buf += sz;
8755084Sjohnlev 			}
8765084Sjohnlev 		}
8775084Sjohnlev 
8785084Sjohnlev 		mdb_free(name, 1024);
8795084Sjohnlev 	}
8805084Sjohnlev 
8815084Sjohnlev 	return (1);
8825084Sjohnlev }
8835084Sjohnlev 
8845084Sjohnlev static int
xkb_walk_syms(xkb_t * xkb,uintptr_t modhead,char ** buf,size_t * sizes,int types)8855084Sjohnlev xkb_walk_syms(xkb_t *xkb, uintptr_t modhead, char **buf,
8865084Sjohnlev     size_t *sizes, int types)
8875084Sjohnlev {
8885084Sjohnlev 	uintptr_t modctl = modhead;
8895084Sjohnlev 	uintptr_t modulep;
8905084Sjohnlev 	struct module module;
8915084Sjohnlev 	uintptr_t sym_count;
8925084Sjohnlev 	uintptr_t sym_addr;
8935084Sjohnlev 	uintptr_t str_addr;
8945084Sjohnlev 	size_t max_iter = 500;
8955084Sjohnlev 
8965084Sjohnlev 	bzero(sizes, sizeof (*sizes) * (XKB_WALK_STR + 1));
8975084Sjohnlev 
8985084Sjohnlev 	/*
8995084Sjohnlev 	 * empty first symbol
9005084Sjohnlev 	 */
9015084Sjohnlev 	sizes[XKB_WALK_LOCAL] += sizeof (Sym);
9025084Sjohnlev 	sizes[XKB_WALK_STR] += 1;
9035084Sjohnlev 
9045084Sjohnlev 	if (buf != NULL) {
9055084Sjohnlev 		if (types & XKB_WALK_LOCAL) {
9065084Sjohnlev 			Sym tmp;
9075084Sjohnlev 			bzero(&tmp, sizeof (tmp));
9085084Sjohnlev 			bcopy(&tmp, *buf, sizeof (tmp));
9095084Sjohnlev 			*buf += sizeof (tmp);
9105084Sjohnlev 		}
9115084Sjohnlev 		if (types & XKB_WALK_STR) {
9125084Sjohnlev 			**buf = '\0';
9135084Sjohnlev 			(*buf)++;
9145084Sjohnlev 		}
9155084Sjohnlev 	}
9165084Sjohnlev 
9175084Sjohnlev 	for (;;) {
9185084Sjohnlev 		if (!xkb_read_word(xkb,
9195084Sjohnlev 		    modctl + offsetof(struct modctl, mod_mp), &modulep))
9205084Sjohnlev 			return (0);
9215084Sjohnlev 
9225084Sjohnlev 		if (modulep == NULL)
9235084Sjohnlev 			goto next;
9245084Sjohnlev 
9255084Sjohnlev 		if (!xkb_read_module(xkb, modulep, &module, &sym_addr,
9265084Sjohnlev 		    &sym_count, &str_addr))
9275084Sjohnlev 			return (0);
9285084Sjohnlev 
9295084Sjohnlev 		if ((module.flags & KOBJ_NOKSYMS))
9305084Sjohnlev 			goto next;
9315084Sjohnlev 
9325084Sjohnlev 		if (!xkb_read_modsyms(xkb, buf, sizes, types, sym_addr,
9335084Sjohnlev 		    str_addr, sym_count))
9345084Sjohnlev 			return (0);
9355084Sjohnlev 
9365084Sjohnlev next:
9375084Sjohnlev 		if (!xkb_read_word(xkb,
9385084Sjohnlev 		    modctl + offsetof(struct modctl, mod_next), &modctl))
9395084Sjohnlev 			return (0);
9405084Sjohnlev 
9415084Sjohnlev 		if (modctl == modhead)
9425084Sjohnlev 			break;
9435084Sjohnlev 		/*
9445084Sjohnlev 		 * Try and prevent us looping forever if we have a broken list.
9455084Sjohnlev 		 */
9465084Sjohnlev 		if (--max_iter == 0)
9475084Sjohnlev 			break;
9485084Sjohnlev 	}
9495084Sjohnlev 
9505084Sjohnlev 	return (1);
9515084Sjohnlev }
9525084Sjohnlev 
9535084Sjohnlev /*
9545084Sjohnlev  * Userspace equivalent of ksyms_snapshot().  Since we don't have a namelist
9555084Sjohnlev  * file for hypervisor images, we fabricate one here using code similar
9565084Sjohnlev  * to that of /dev/ksyms.
9575084Sjohnlev  */
9585084Sjohnlev static int
xkb_build_ksyms(xkb_t * xkb)9595084Sjohnlev xkb_build_ksyms(xkb_t *xkb)
9605084Sjohnlev {
9615084Sjohnlev 	debug_info_t *info = &xkb->xkb_info;
9625084Sjohnlev 	size_t sizes[XKB_WALK_STR + 1];
9635084Sjohnlev 	xkb_namelist_t *hdr;
9645084Sjohnlev 	char *buf;
9655084Sjohnlev 	struct modctl modules;
9665084Sjohnlev 	uintptr_t module;
9675084Sjohnlev 	Shdr *shp;
9685084Sjohnlev 
9695084Sjohnlev 	if (xkb_read(xkb, info->di_modules, &modules,
9705084Sjohnlev 	    sizeof (struct modctl)) != sizeof (struct modctl))
9715084Sjohnlev 		return (0);
9725084Sjohnlev 
9735084Sjohnlev 	module = (uintptr_t)modules.mod_mp;
9745084Sjohnlev 
9755084Sjohnlev 	if (!xkb_walk_syms(xkb, info->di_modules, NULL, sizes,
9765084Sjohnlev 	    XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR))
9775084Sjohnlev 		return (0);
9785084Sjohnlev 
9795084Sjohnlev 	xkb->xkb_namesize = sizeof (xkb_namelist_t);
9805084Sjohnlev 	xkb->xkb_namesize += sizes[XKB_WALK_LOCAL];
9815084Sjohnlev 	xkb->xkb_namesize += sizes[XKB_WALK_GLOBAL];
9825084Sjohnlev 	xkb->xkb_namesize += sizes[XKB_WALK_STR];
9835084Sjohnlev 
9845084Sjohnlev 	if ((xkb->xkb_namelist = mdb_zalloc(xkb->xkb_namesize, UM_SLEEP))
9855084Sjohnlev 	    == NULL)
9865084Sjohnlev 		return (0);
9875084Sjohnlev 
9885084Sjohnlev 	/* LINTED - alignment */
9895084Sjohnlev 	hdr = (xkb_namelist_t *)xkb->xkb_namelist;
9905084Sjohnlev 
9915084Sjohnlev 	if (xkb_read(xkb, module + offsetof(struct module, hdr),
9925084Sjohnlev 	    &hdr->kh_elf_hdr, sizeof (Ehdr)) != sizeof (Ehdr))
9935084Sjohnlev 		return (0);
9945084Sjohnlev 
9955084Sjohnlev 	hdr->kh_elf_hdr.e_phoff = offsetof(xkb_namelist_t, kh_text_phdr);
9965084Sjohnlev 	hdr->kh_elf_hdr.e_shoff = offsetof(xkb_namelist_t, kh_shdr);
9975084Sjohnlev 	hdr->kh_elf_hdr.e_phnum = 2;
9985084Sjohnlev 	hdr->kh_elf_hdr.e_shnum = XKB_SHDR_NUM;
9995084Sjohnlev 	hdr->kh_elf_hdr.e_shstrndx = XKB_SHDR_SHSTRTAB;
10005084Sjohnlev 
10015084Sjohnlev 	hdr->kh_text_phdr.p_type = PT_LOAD;
10025084Sjohnlev 	hdr->kh_text_phdr.p_vaddr = (Addr)info->di_s_text;
10035084Sjohnlev 	hdr->kh_text_phdr.p_memsz = (Word)(info->di_e_text - info->di_s_text);
10045084Sjohnlev 	hdr->kh_text_phdr.p_flags = PF_R | PF_X;
10055084Sjohnlev 
10065084Sjohnlev 	hdr->kh_data_phdr.p_type = PT_LOAD;
10075084Sjohnlev 	hdr->kh_data_phdr.p_vaddr = (Addr)info->di_s_data;
10085084Sjohnlev 	hdr->kh_data_phdr.p_memsz = (Word)(info->di_e_data - info->di_s_data);
10095084Sjohnlev 	hdr->kh_data_phdr.p_flags = PF_R | PF_W | PF_X;
10105084Sjohnlev 
10115084Sjohnlev 	shp = &hdr->kh_shdr[XKB_SHDR_SYMTAB];
10125084Sjohnlev 	shp->sh_name = 1;	/* xkb_shstrtab[1] = ".symtab" */
10135084Sjohnlev 	shp->sh_type = SHT_SYMTAB;
10145084Sjohnlev 	shp->sh_offset = sizeof (xkb_namelist_t);
10155084Sjohnlev 	shp->sh_size = sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
10165084Sjohnlev 	shp->sh_link = XKB_SHDR_STRTAB;
10175084Sjohnlev 	shp->sh_info = sizes[XKB_WALK_LOCAL] / sizeof (Sym);
10185084Sjohnlev 	shp->sh_addralign = sizeof (Addr);
10195084Sjohnlev 	shp->sh_entsize = sizeof (Sym);
10205084Sjohnlev 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
10215084Sjohnlev 
10225084Sjohnlev 
10235084Sjohnlev 	shp = &hdr->kh_shdr[XKB_SHDR_STRTAB];
10245084Sjohnlev 	shp->sh_name = 9;	/* xkb_shstrtab[9] = ".strtab" */
10255084Sjohnlev 	shp->sh_type = SHT_STRTAB;
10265084Sjohnlev 	shp->sh_offset = sizeof (xkb_namelist_t) +
10275084Sjohnlev 	    sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
10285084Sjohnlev 	shp->sh_size = sizes[XKB_WALK_STR];
10295084Sjohnlev 	shp->sh_addralign = 1;
10305084Sjohnlev 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
10315084Sjohnlev 
10325084Sjohnlev 
10335084Sjohnlev 	shp = &hdr->kh_shdr[XKB_SHDR_SHSTRTAB];
10345084Sjohnlev 	shp->sh_name = 17;	/* xkb_shstrtab[17] = ".shstrtab" */
10355084Sjohnlev 	shp->sh_type = SHT_STRTAB;
10365084Sjohnlev 	shp->sh_offset = offsetof(xkb_namelist_t, shstrings);
10375084Sjohnlev 	shp->sh_size = sizeof (xkb_shstrtab);
10385084Sjohnlev 	shp->sh_addralign = 1;
10395084Sjohnlev 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
10405084Sjohnlev 
10415084Sjohnlev 	bcopy(xkb_shstrtab, hdr->shstrings, sizeof (xkb_shstrtab));
10425084Sjohnlev 
10435084Sjohnlev 	buf = xkb->xkb_namelist + sizeof (xkb_namelist_t);
10445084Sjohnlev 
10455084Sjohnlev 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
10465084Sjohnlev 	    XKB_WALK_LOCAL))
10475084Sjohnlev 		return (0);
10485084Sjohnlev 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
10495084Sjohnlev 	    XKB_WALK_GLOBAL))
10505084Sjohnlev 		return (0);
10515084Sjohnlev 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
10525084Sjohnlev 	    XKB_WALK_STR))
10535084Sjohnlev 		return (0);
10545084Sjohnlev 
10555084Sjohnlev 	return (1);
10565084Sjohnlev }
10575084Sjohnlev 
10586144Srab static xkb_t *
xkb_open_core(xkb_t * xkb)10596144Srab xkb_open_core(xkb_t *xkb)
10606144Srab {
10616144Srab 	xkb_core_t *xc = &xkb->xkb_core;
10626144Srab 	size_t sz;
1063*10175SStuart.Maybee@Sun.COM 	int i;
1064*10175SStuart.Maybee@Sun.COM 	struct vcpu_guest_context *vcp;
10656144Srab 
10666144Srab 	xkb->xkb_type = XKB_FORMAT_CORE;
10676144Srab 
10686144Srab 	if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1)
10696144Srab 		return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path));
10706144Srab 
10716144Srab 	if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) !=
10726144Srab 	    sizeof (xc->xc_hdr))
10736144Srab 		return (xkb_fail(xkb, "invalid dump file"));
10746144Srab 
10756144Srab 	if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM)
10766144Srab 		return (xkb_fail(xkb, "cannot process HVM images"));
10776144Srab 
10786144Srab 	if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) {
10796144Srab 		return (xkb_fail(xkb, "invalid magic %d",
10806144Srab 		    xc->xc_hdr.xch_magic));
10816144Srab 	}
10826144Srab 
10836144Srab 	/*
10846144Srab 	 * With FORMAT_CORE, all pages are in the dump (non-existing
10856144Srab 	 * ones are zeroed out).
10866144Srab 	 */
10876144Srab 	xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages;
10886144Srab 	xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset;
10896144Srab 	xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1;
10906144Srab 	xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus;
10916144Srab 
1092*10175SStuart.Maybee@Sun.COM 	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context);
1093*10175SStuart.Maybee@Sun.COM 	xkb->xkb_vcpu_data_sz = sz;
1094*10175SStuart.Maybee@Sun.COM 	xkb->xkb_vcpu_data = mdb_alloc(sz, UM_SLEEP);
10956144Srab 
1096*10175SStuart.Maybee@Sun.COM 	if (pread64(xkb->xkb_fd, xkb->xkb_vcpu_data, sz,
1097*10175SStuart.Maybee@Sun.COM 	    xc->xc_hdr.xch_ctxt_offset) != sz)
1098*10175SStuart.Maybee@Sun.COM 		return (xkb_fail(xkb, "cannot read VCPU contexts"));
1099*10175SStuart.Maybee@Sun.COM 
1100*10175SStuart.Maybee@Sun.COM 	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
11016144Srab 	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
11026144Srab 
1103*10175SStuart.Maybee@Sun.COM 	vcp = xkb->xkb_vcpu_data;
1104*10175SStuart.Maybee@Sun.COM 	for (i = 0; i < xkb->xkb_nr_vcpus; i++)
1105*10175SStuart.Maybee@Sun.COM 		xkb->xkb_vcpus[i] = &vcp[i];
11066144Srab 
11076144Srab 	/*
11086144Srab 	 * Try to map all the data pages. If we can't, fall back to the
11096144Srab 	 * window/pread() approach, which is significantly slower.
11106144Srab 	 */
11116144Srab 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
11126144Srab 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset);
11136144Srab 
11146144Srab 	if (xkb->xkb_pages == (char *)MAP_FAILED)
11156144Srab 		xkb->xkb_pages = NULL;
11166144Srab 
11176144Srab 	/*
11186144Srab 	 * We'd like to adapt for correctness' sake, but we have no way of
11196144Srab 	 * detecting a PAE guest, since cr4 writes are disallowed.
11206144Srab 	 */
11216144Srab 	xkb->xkb_is_pae = 1;
11226144Srab 
11236144Srab 	if (!xkb_map_p2m(xkb))
11246144Srab 		return (NULL);
11256144Srab 
11266144Srab 	return (xkb);
11276144Srab }
11286144Srab 
11296144Srab static xkb_t *
xkb_open_elf(xkb_t * xkb)11306144Srab xkb_open_elf(xkb_t *xkb)
11316144Srab {
11326144Srab 	xkb_elf_t *xe = &xkb->xkb_elf;
11336144Srab 	mdb_gelf_sect_t *sect;
11346144Srab 	char *notes;
11356144Srab 	char *pos;
11366144Srab 	mdb_io_t *io;
1137*10175SStuart.Maybee@Sun.COM 	size_t sz;
1138*10175SStuart.Maybee@Sun.COM 	int i;
1139*10175SStuart.Maybee@Sun.COM 	void *dp;
11406144Srab 
11416144Srab 	if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path,
11426144Srab 	    O_RDONLY, 0)) == NULL)
11436144Srab 		return (xkb_fail(xkb, "failed to open"));
11446144Srab 
11456144Srab 	xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE);
11466144Srab 
11476144Srab 	if (xe->xe_gelf == NULL) {
11486144Srab 		mdb_io_destroy(io);
11496144Srab 		return (xkb);
11506144Srab 	}
11516144Srab 
11526144Srab 	xkb->xkb_fd = mdb_fdio_fileno(io);
11536144Srab 
11546144Srab 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen");
11556144Srab 
11566144Srab 	if (sect == NULL)
11576144Srab 		return (xkb);
11586144Srab 
11596144Srab 	if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
11606144Srab 		return (xkb);
11616144Srab 
11626144Srab 	/*
11636144Srab 	 * Now we know this is indeed a hypervisor core dump, even if
11646144Srab 	 * it's corrupted.
11656144Srab 	 */
11666144Srab 	xkb->xkb_type = XKB_FORMAT_ELF;
11676144Srab 
11686144Srab 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
11696144Srab 		/* LINTED - alignment */
11706144Srab 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
11716144Srab 		uint64_t vers;
11726144Srab 		char *desc;
11736144Srab 		char *name;
11746144Srab 
11756144Srab 		name = pos + sizeof (*nhdr);
11766144Srab 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
11776144Srab 
11786144Srab 		pos = desc + nhdr->n_descsz;
11796144Srab 
11806144Srab 		switch (nhdr->n_type) {
11816144Srab 		case XEN_ELFNOTE_DUMPCORE_NONE:
11826144Srab 			break;
11836144Srab 
11846144Srab 		case XEN_ELFNOTE_DUMPCORE_HEADER:
11856144Srab 			if (nhdr->n_descsz != sizeof (struct xc_elf_header)) {
11866144Srab 				return (xkb_fail(xkb, "invalid ELF note "
11876144Srab 				    "XEN_ELFNOTE_DUMPCORE_HEADER\n"));
11886144Srab 			}
11896144Srab 
11906144Srab 			bcopy(desc, &xe->xe_hdr,
11916144Srab 			    sizeof (struct xc_elf_header));
11926144Srab 			break;
11936144Srab 
11946144Srab 		case XEN_ELFNOTE_DUMPCORE_XEN_VERSION:
1195*10175SStuart.Maybee@Sun.COM 			if (nhdr->n_descsz < sizeof (struct xc_elf_version)) {
11966144Srab 				return (xkb_fail(xkb, "invalid ELF note "
11976144Srab 				    "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n"));
11986144Srab 			}
11996144Srab 
12006144Srab 			bcopy(desc, &xe->xe_version,
12016144Srab 			    sizeof (struct xc_elf_version));
12026144Srab 			break;
12036144Srab 
12046144Srab 		case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION:
12056144Srab 			/* LINTED - alignment */
12066144Srab 			vers = *((uint64_t *)desc);
12076144Srab 			if ((vers >> 32) != 0) {
12086144Srab 				return (xkb_fail(xkb, "unknown major "
12096144Srab 				    "version %d (expected 0)\n",
12106144Srab 				    (int)(vers >> 32)));
12116144Srab 			}
12126144Srab 
12136144Srab 			if ((vers & 0xffffffff) != 1) {
12146144Srab 				mdb_warn("unexpected dump minor number "
12156144Srab 				    "version %d (expected 1)\n",
12166144Srab 				    (int)(vers & 0xffffffff));
12176144Srab 			}
12186144Srab 			break;
12196144Srab 
12206144Srab 		default:
12216144Srab 			mdb_warn("unknown ELF note %d(%s)\n",
12226144Srab 			    nhdr->n_type, name);
12236144Srab 			break;
12246144Srab 		}
12256144Srab 	}
12266144Srab 
12277486Sjohn.levon@sun.com 	xkb->xkb_is_hvm = xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM;
12286144Srab 
12297486Sjohn.levon@sun.com 	if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC &&
12307486Sjohn.levon@sun.com 	    xe->xe_hdr.xeh_magic != XC_CORE_MAGIC_HVM) {
12316144Srab 		return (xkb_fail(xkb, "invalid magic %d",
12326144Srab 		    xe->xe_hdr.xeh_magic));
12336144Srab 	}
12346144Srab 
12356144Srab 	xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages;
12366144Srab 	xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities,
12376144Srab 	    "x86_32p") != NULL);
12386144Srab 
12396144Srab 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus");
12406144Srab 
12416144Srab 	if (sect == NULL)
12426144Srab 		return (xkb_fail(xkb, "cannot find section .xen_prstatus"));
12436144Srab 
1244*10175SStuart.Maybee@Sun.COM 	if (sect->gs_shdr.sh_entsize < sizeof (vcpu_guest_context_t))
12456144Srab 		return (xkb_fail(xkb, "invalid section .xen_prstatus"));
12466144Srab 
12476144Srab 	xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize;
12486144Srab 
1249*10175SStuart.Maybee@Sun.COM 	xkb->xkb_vcpu_data = mdb_gelf_sect_load(xe->xe_gelf, sect);
1250*10175SStuart.Maybee@Sun.COM 	if (xkb->xkb_vcpu_data == NULL)
12516144Srab 		return (xkb_fail(xkb, "cannot load section .xen_prstatus"));
1252*10175SStuart.Maybee@Sun.COM 	xkb->xkb_vcpu_data_sz = sect->gs_shdr.sh_size;
1253*10175SStuart.Maybee@Sun.COM 
1254*10175SStuart.Maybee@Sun.COM 	/*
1255*10175SStuart.Maybee@Sun.COM 	 * The vcpu_guest_context structures saved in the core file
1256*10175SStuart.Maybee@Sun.COM 	 * are actually unions of the 64-bit and 32-bit versions.
1257*10175SStuart.Maybee@Sun.COM 	 * Don't rely on the entry size to match the size of
1258*10175SStuart.Maybee@Sun.COM 	 * the structure, but set up an array of pointers.
1259*10175SStuart.Maybee@Sun.COM 	 */
1260*10175SStuart.Maybee@Sun.COM 	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
1261*10175SStuart.Maybee@Sun.COM 	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1262*10175SStuart.Maybee@Sun.COM 	for (i = 0; i < xkb->xkb_nr_vcpus; i++) {
1263*10175SStuart.Maybee@Sun.COM 		dp = ((char *)xkb->xkb_vcpu_data +
1264*10175SStuart.Maybee@Sun.COM 		    i * sect->gs_shdr.sh_entsize);
1265*10175SStuart.Maybee@Sun.COM 		xkb->xkb_vcpus[i] = dp;
1266*10175SStuart.Maybee@Sun.COM 	}
12676144Srab 
12686144Srab 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages");
12696144Srab 
12706144Srab 	if (sect == NULL)
12716144Srab 		return (xkb_fail(xkb, "cannot find section .xen_pages"));
12726144Srab 
12736144Srab 	if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset))
12746144Srab 		return (xkb_fail(xkb, ".xen_pages is not page aligned"));
12756144Srab 
12766144Srab 	if (sect->gs_shdr.sh_entsize != PAGE_SIZE)
12776144Srab 		return (xkb_fail(xkb, "invalid section .xen_pages"));
12786144Srab 
12796144Srab 	xkb->xkb_pages_off = sect->gs_shdr.sh_offset;
12806144Srab 
12816144Srab 	/*
12826144Srab 	 * Try to map all the data pages. If we can't, fall back to the
12836144Srab 	 * window/pread() approach, which is significantly slower.
12846144Srab 	 */
12856144Srab 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
12866144Srab 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off);
12876144Srab 
12886144Srab 	if (xkb->xkb_pages == (char *)MAP_FAILED)
12896144Srab 		xkb->xkb_pages = NULL;
12906144Srab 
12917486Sjohn.levon@sun.com 	if (xkb->xkb_is_hvm) {
12927486Sjohn.levon@sun.com 		if (!xkb_build_fake_p2m(xkb))
12937486Sjohn.levon@sun.com 			return (NULL);
12947486Sjohn.levon@sun.com 	} else {
12957486Sjohn.levon@sun.com 		if (!xkb_build_p2m(xkb))
12967486Sjohn.levon@sun.com 			return (NULL);
12977486Sjohn.levon@sun.com 	}
12986144Srab 
12996144Srab 	return (xkb);
13006144Srab }
13016144Srab 
13026144Srab static void
xkb_init_mmu(xkb_t * xkb)13036144Srab xkb_init_mmu(xkb_t *xkb)
13046144Srab {
13056144Srab #if defined(__amd64)
13066144Srab 	xkb->xkb_mmu.mi_max = 3;
13076144Srab 	xkb->xkb_mmu.mi_shift[0] = 12;
13086144Srab 	xkb->xkb_mmu.mi_shift[1] = 21;
13096144Srab 	xkb->xkb_mmu.mi_shift[2] = 30;
13106144Srab 	xkb->xkb_mmu.mi_shift[3] = 39;
13116144Srab 	xkb->xkb_mmu.mi_ptes = 512;
13126144Srab 	xkb->xkb_mmu.mi_ptesize = 8;
13136144Srab #elif defined(__i386)
13146144Srab 	if (xkb->xkb_is_pae) {
13156144Srab 		xkb->xkb_mmu.mi_max = 2;
13166144Srab 		xkb->xkb_mmu.mi_shift[0] = 12;
13176144Srab 		xkb->xkb_mmu.mi_shift[1] = 21;
13186144Srab 		xkb->xkb_mmu.mi_shift[2] = 30;
13196144Srab 		xkb->xkb_mmu.mi_ptes = 512;
13206144Srab 		xkb->xkb_mmu.mi_ptesize = 8;
13216144Srab 	} else {
13226144Srab 		xkb->xkb_mmu.mi_max = 1;
13236144Srab 		xkb->xkb_mmu.mi_shift[0] = 12;
13246144Srab 		xkb->xkb_mmu.mi_shift[1] = 22;
13256144Srab 		xkb->xkb_mmu.mi_ptes = 1024;
13266144Srab 		xkb->xkb_mmu.mi_ptesize = 4;
13276144Srab 	}
13286144Srab #endif
13296144Srab }
13306144Srab 
13315084Sjohnlev /*ARGSUSED*/
13325084Sjohnlev xkb_t *
xkb_open(const char * namelist,const char * corefile,const char * swapfile,int flag,const char * err)13335084Sjohnlev xkb_open(const char *namelist, const char *corefile, const char *swapfile,
13345084Sjohnlev     int flag, const char *err)
13355084Sjohnlev {
13367486Sjohn.levon@sun.com 	uintptr_t debug_info = DEBUG_INFO;
13375084Sjohnlev 	struct stat64 corestat;
13386144Srab 	xkb_t *xkb = NULL;
13395084Sjohnlev 	size_t i;
13405084Sjohnlev 
13415084Sjohnlev 	if (stat64(corefile, &corestat) == -1)
13425084Sjohnlev 		return (xkb_fail(xkb, "cannot stat %s", corefile));
13435084Sjohnlev 
13445084Sjohnlev 	if (flag != O_RDONLY)
13455084Sjohnlev 		return (xkb_fail(xkb, "invalid open flags"));
13465084Sjohnlev 
13475084Sjohnlev 	xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
13485084Sjohnlev 
13497486Sjohn.levon@sun.com 	for (i = 0; i < 4; i++) {
13507486Sjohn.levon@sun.com 		xkb->xkb_pt_map[i].mm_mfn = MFN_INVALID;
13515084Sjohnlev 		xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
13527486Sjohn.levon@sun.com 	}
13535084Sjohnlev 
13546144Srab 	xkb->xkb_type = XKB_FORMAT_UNKNOWN;
13557486Sjohn.levon@sun.com 	xkb->xkb_map.mm_mfn = MFN_INVALID;
13565084Sjohnlev 	xkb->xkb_map.mm_map = (char *)MAP_FAILED;
13576144Srab 	xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED;
13586144Srab 	xkb->xkb_fd = -1;
13595084Sjohnlev 
13605084Sjohnlev 	xkb->xkb_path = strdup(corefile);
13615084Sjohnlev 
13626144Srab 	if ((xkb = xkb_open_elf(xkb)) == NULL)
13636144Srab 		return (NULL);
13645084Sjohnlev 
13656144Srab 	if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) {
13666144Srab 		if (!xkb_open_core(xkb))
13676144Srab 			return (NULL);
13685084Sjohnlev 	}
13695084Sjohnlev 
13706144Srab 	xkb_init_mmu(xkb);
13715084Sjohnlev 
13725084Sjohnlev 	if (!xkb_build_m2p(xkb))
13735084Sjohnlev 		return (NULL);
13745084Sjohnlev 
13757486Sjohn.levon@sun.com 	if (xkb->xkb_is_hvm)
13767486Sjohn.levon@sun.com 		debug_info = DEBUG_INFO_HVM;
13777486Sjohn.levon@sun.com 
13787486Sjohn.levon@sun.com 	if (xkb_read(xkb, debug_info, &xkb->xkb_info,
13795084Sjohnlev 	    sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info))
13805084Sjohnlev 		return (xkb_fail(xkb, "cannot read debug_info"));
13815084Sjohnlev 
13825084Sjohnlev 	if (xkb->xkb_info.di_magic != DEBUG_INFO_MAGIC) {
13835084Sjohnlev 		return (xkb_fail(xkb, "invalid debug info magic %d",
13845084Sjohnlev 		    xkb->xkb_info.di_magic));
13855084Sjohnlev 	}
13865084Sjohnlev 
13875084Sjohnlev 	if (xkb->xkb_info.di_version != DEBUG_INFO_VERSION) {
13885084Sjohnlev 		return (xkb_fail(xkb, "unknown debug info version %d",
13895084Sjohnlev 		    xkb->xkb_info.di_version));
13905084Sjohnlev 	}
13915084Sjohnlev 
13925084Sjohnlev 	if (!xkb_build_ksyms(xkb))
13935084Sjohnlev 		return (xkb_fail(xkb, "cannot construct namelist"));
13945084Sjohnlev 
13955084Sjohnlev 	return (xkb);
13965084Sjohnlev }
13975084Sjohnlev 
13985084Sjohnlev int
xkb_close(xkb_t * xkb)13995084Sjohnlev xkb_close(xkb_t *xkb)
14005084Sjohnlev {
1401*10175SStuart.Maybee@Sun.COM 	size_t i, sz;
14025084Sjohnlev 
14035084Sjohnlev 	if (xkb == NULL)
14045084Sjohnlev 		return (0);
14055084Sjohnlev 
14065084Sjohnlev 	if (xkb->xkb_m2p != NULL) {
14075084Sjohnlev 		mdb_free(xkb->xkb_m2p,
14085084Sjohnlev 		    (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t));
14095084Sjohnlev 	}
14105084Sjohnlev 
14115084Sjohnlev 	if (xkb->xkb_pages != NULL) {
14125084Sjohnlev 		(void) munmap((void *)xkb->xkb_pages,
14136144Srab 		    PAGE_SIZE * xkb->xkb_nr_pages);
14145084Sjohnlev 	} else {
14155084Sjohnlev 		for (i = 0; i < 4; i++) {
14165084Sjohnlev 			char *addr = xkb->xkb_pt_map[i].mm_map;
14175084Sjohnlev 			if (addr != (char *)MAP_FAILED)
14185084Sjohnlev 				(void) munmap((void *)addr, PAGE_SIZE);
14195084Sjohnlev 		}
14205084Sjohnlev 		if (xkb->xkb_map.mm_map != (char *)MAP_FAILED) {
14215084Sjohnlev 			(void) munmap((void *)xkb->xkb_map.mm_map,
14225084Sjohnlev 			    PAGE_SIZE);
14235084Sjohnlev 		}
14245084Sjohnlev 	}
14255084Sjohnlev 
14265084Sjohnlev 	if (xkb->xkb_namelist != NULL)
14275084Sjohnlev 		mdb_free(xkb->xkb_namelist, xkb->xkb_namesize);
14285084Sjohnlev 
14296144Srab 	if (xkb->xkb_type == XKB_FORMAT_ELF) {
14306144Srab 		xkb_elf_t *xe = &xkb->xkb_elf;
14316144Srab 
14326144Srab 		if (xe->xe_gelf != NULL)
14336144Srab 			mdb_gelf_destroy(xe->xe_gelf);
14346144Srab 
14356144Srab 		sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
14366144Srab 
14376144Srab 		if (xkb->xkb_p2m != NULL)
14386144Srab 			mdb_free(xkb->xkb_p2m, sz);
14396144Srab 
14406144Srab 		sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
14416144Srab 
14426144Srab 		if (xe->xe_off != NULL)
14436144Srab 			mdb_free(xe->xe_off, sz);
1444*10175SStuart.Maybee@Sun.COM 
14456144Srab 	} else if (xkb->xkb_type == XKB_FORMAT_CORE) {
14466144Srab 		xkb_core_t *xc = &xkb->xkb_core;
14476144Srab 
14486144Srab 		if (xkb->xkb_fd != -1)
14496144Srab 			(void) close(xkb->xkb_fd);
14506144Srab 
14516144Srab 		sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2);
14526144Srab 		sz = PAGE_MASK(sz);
14536144Srab 
14546144Srab 		if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED)
14556144Srab 			(void) munmap(xc->xc_p2m_buf, sz);
14566144Srab 
1457*10175SStuart.Maybee@Sun.COM 		if (xkb->xkb_vcpu_data != NULL)
1458*10175SStuart.Maybee@Sun.COM 			mdb_free(xkb->xkb_vcpu_data, xkb->xkb_vcpu_data_sz);
1459*10175SStuart.Maybee@Sun.COM 	}
1460*10175SStuart.Maybee@Sun.COM 
1461*10175SStuart.Maybee@Sun.COM 	if (xkb->xkb_vcpus != NULL) {
1462*10175SStuart.Maybee@Sun.COM 		sz = sizeof (struct vcpu_guest_context *) *
1463*10175SStuart.Maybee@Sun.COM 		    xkb->xkb_nr_vcpus;
1464*10175SStuart.Maybee@Sun.COM 		mdb_free(xkb->xkb_vcpus, sz);
14656144Srab 	}
14665084Sjohnlev 
14675084Sjohnlev 	free(xkb->xkb_path);
14685084Sjohnlev 
14695084Sjohnlev 	mdb_free(xkb, sizeof (*xkb));
14705084Sjohnlev 	return (0);
14715084Sjohnlev }
14725084Sjohnlev 
14735084Sjohnlev /*ARGSUSED*/
14745084Sjohnlev static mdb_io_t *
xkb_sym_io(xkb_t * xkb,const char * symfile)14755084Sjohnlev xkb_sym_io(xkb_t *xkb, const char *symfile)
14765084Sjohnlev {
14775084Sjohnlev 	mdb_io_t *io = mdb_memio_create(xkb->xkb_namelist, xkb->xkb_namesize);
14785084Sjohnlev 
14795084Sjohnlev 	if (io == NULL)
14805084Sjohnlev 		mdb_warn("failed to create namelist from %s", xkb->xkb_path);
14815084Sjohnlev 
14825084Sjohnlev 	return (io);
14835084Sjohnlev }
14845084Sjohnlev 
14855084Sjohnlev uint64_t
xkb_vtop(xkb_t * xkb,struct as * as,uintptr_t addr)14865084Sjohnlev xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr)
14875084Sjohnlev {
14887486Sjohn.levon@sun.com 	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
14895084Sjohnlev 	mfn_t mfn;
14905084Sjohnlev 
14915084Sjohnlev 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
14925084Sjohnlev 		return (-1ULL);
14935084Sjohnlev 
14945084Sjohnlev 	mfn = xkb_va_to_mfn(xkb, addr, tlmfn);
14955084Sjohnlev 
14965084Sjohnlev 	if (mfn == MFN_INVALID || mfn > xkb->xkb_max_mfn)
14975084Sjohnlev 		return (-1ULL);
14985084Sjohnlev 
14995084Sjohnlev 	return (((uint64_t)xkb->xkb_m2p[mfn] << PAGE_SHIFT)
15005084Sjohnlev 	    | PAGE_OFFSET(addr));
15015084Sjohnlev }
15025084Sjohnlev 
15035084Sjohnlev static int
xkb_getmregs(xkb_t * xkb,uint_t cpu,struct privmregs * mregs)15045084Sjohnlev xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs)
15055084Sjohnlev {
15065084Sjohnlev 	struct vcpu_guest_context *vcpu;
15075084Sjohnlev 	struct cpu_user_regs *ur;
15085084Sjohnlev 	struct regs *regs;
15095084Sjohnlev 
15106144Srab 	if (cpu >= xkb->xkb_nr_vcpus) {
15115084Sjohnlev 		errno = EINVAL;
15125084Sjohnlev 		return (-1);
15135084Sjohnlev 	}
15145084Sjohnlev 
15155084Sjohnlev 	bzero(mregs, sizeof (*mregs));
15165084Sjohnlev 
1517*10175SStuart.Maybee@Sun.COM 	vcpu = xkb->xkb_vcpus[cpu];
15185084Sjohnlev 	ur = &vcpu->user_regs;
15195084Sjohnlev 	regs = &mregs->pm_gregs;
15205084Sjohnlev 
15215084Sjohnlev 	regs->r_ss = ur->ss;
15225084Sjohnlev 	regs->r_cs = ur->cs;
15235084Sjohnlev 	regs->r_ds = ur->ds;
15245084Sjohnlev 	regs->r_es = ur->es;
15255084Sjohnlev 	regs->r_fs = ur->fs;
15265084Sjohnlev 	regs->r_gs = ur->gs;
15275084Sjohnlev 	regs->r_trapno = ur->entry_vector;
15285084Sjohnlev 	regs->r_err = ur->error_code;
15295084Sjohnlev #ifdef __amd64
15305084Sjohnlev 	regs->r_savfp = ur->rbp;
15315084Sjohnlev 	regs->r_savpc = ur->rip;
15325084Sjohnlev 	regs->r_rdi = ur->rdi;
15335084Sjohnlev 	regs->r_rsi = ur->rsi;
15345084Sjohnlev 	regs->r_rdx = ur->rdx;
15355084Sjohnlev 	regs->r_rcx = ur->rcx;
15365084Sjohnlev 	regs->r_r8 = ur->r8;
15375084Sjohnlev 	regs->r_r9 = ur->r9;
15385084Sjohnlev 	regs->r_rax = ur->rax;
15395084Sjohnlev 	regs->r_rbx = ur->rbx;
15405084Sjohnlev 	regs->r_rbp = ur->rbp;
15415084Sjohnlev 	regs->r_r10 = ur->r10;
15425084Sjohnlev 	regs->r_r11 = ur->r11;
15435084Sjohnlev 	regs->r_r12 = ur->r12;
15445084Sjohnlev 	regs->r_r13 = ur->r13;
15455084Sjohnlev 	regs->r_r14 = ur->r14;
15465084Sjohnlev 	regs->r_r15 = ur->r15;
15475084Sjohnlev 	regs->r_rip = ur->rip;
15485084Sjohnlev 	regs->r_rfl = ur->rflags;
15495084Sjohnlev 	regs->r_rsp = ur->rsp;
15505084Sjohnlev #else
15515084Sjohnlev 	regs->r_savfp = ur->ebp;
15525084Sjohnlev 	regs->r_savpc = ur->eip;
15535084Sjohnlev 	regs->r_edi = ur->edi;
15545084Sjohnlev 	regs->r_esi = ur->esi;
15555084Sjohnlev 	regs->r_ebp = ur->ebp;
15565084Sjohnlev 	regs->r_esp = ur->esp;
15575084Sjohnlev 	regs->r_ebx = ur->ebx;
15585084Sjohnlev 	regs->r_edx = ur->edx;
15595084Sjohnlev 	regs->r_ecx = ur->ecx;
15605084Sjohnlev 	regs->r_eax = ur->eax;
15615084Sjohnlev 	regs->r_eip = ur->eip;
15625084Sjohnlev 	regs->r_efl = ur->eflags;
15635084Sjohnlev 	regs->r_uesp = 0;
15645084Sjohnlev #endif
15655084Sjohnlev 
15665084Sjohnlev 	bcopy(&vcpu->ctrlreg, &mregs->pm_cr, 8 * sizeof (ulong_t));
15675084Sjohnlev 	bcopy(&vcpu->debugreg, &mregs->pm_dr, 8 * sizeof (ulong_t));
15685084Sjohnlev 
15695084Sjohnlev 	mregs->pm_flags = PM_GREGS | PM_CRREGS | PM_DRREGS;
15705084Sjohnlev 
15715084Sjohnlev 	return (0);
15725084Sjohnlev }
15735084Sjohnlev 
15745084Sjohnlev static mdb_kb_ops_t xpv_kb_ops = {
15755084Sjohnlev 	.kb_open = (void *(*)())xkb_open,
15765084Sjohnlev 	.kb_close = (int (*)())xkb_close,
15775084Sjohnlev 	.kb_sym_io = (mdb_io_t *(*)())xkb_sym_io,
15785084Sjohnlev 	.kb_kread = (ssize_t (*)())xkb_read,
15795084Sjohnlev 	.kb_kwrite = (ssize_t (*)())mdb_tgt_notsup,
15805084Sjohnlev 	.kb_aread = (ssize_t (*)())xkb_aread,
15815084Sjohnlev 	.kb_awrite = (ssize_t (*)())mdb_tgt_notsup,
15825084Sjohnlev 	.kb_pread = (ssize_t (*)())xkb_pread,
15835084Sjohnlev 	.kb_pwrite = (ssize_t (*)())mdb_tgt_notsup,
15845084Sjohnlev 	.kb_vtop = (uint64_t (*)())xkb_vtop,
15855084Sjohnlev 	.kb_getmregs = (int (*)())xkb_getmregs
15865084Sjohnlev };
15875084Sjohnlev 
15885084Sjohnlev mdb_kb_ops_t *
mdb_kb_ops(void)15895084Sjohnlev mdb_kb_ops(void)
15905084Sjohnlev {
15915084Sjohnlev 	return (&xpv_kb_ops);
15925084Sjohnlev }
15935084Sjohnlev 
15945084Sjohnlev static const mdb_dcmd_t dcmds[] = { NULL, };
15955084Sjohnlev static const mdb_walker_t walkers[] = { NULL, };
15965084Sjohnlev static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
15975084Sjohnlev 
15985084Sjohnlev const mdb_modinfo_t *
_mdb_init(void)15995084Sjohnlev _mdb_init(void)
16005084Sjohnlev {
16015084Sjohnlev 	return (&modinfo);
16025084Sjohnlev }
16035084Sjohnlev 
16045084Sjohnlev void
_mdb_fini(void)16055084Sjohnlev _mdb_fini(void)
16065084Sjohnlev {
16075084Sjohnlev }
1608