Home | History | Annotate | Download | only in mdb_kb
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * KVM backend for hypervisor domain dumps.  We don't use libkvm for
     28  * such dumps, since they do not have a namelist file or the typical
     29  * dump structures we expect to aid bootstrapping.  Instead, we
     30  * bootstrap based upon a debug_info structure at a known VA, using the
     31  * guest's own page tables to resolve to physical addresses, and
     32  * construct the namelist in a manner similar to ksyms_snapshot().
     33  *
     34  * Note that there are two formats understood by this module: the older,
     35  * ad hoc format, which we call 'core' within this file, and an
     36  * ELF-based format, known as 'elf'.
     37  *
     38  * We only support the older format generated on Solaris dom0: before we
     39  * fixed it, core dump files were broken whenever a PFN didn't map a
     40  * real MFN (!).
     41  */
     42 
     43 #include <strings.h>
     44 #include <stdio.h>
     45 #include <stdlib.h>
     46 #include <stddef.h>
     47 #include <stdarg.h>
     48 #include <unistd.h>
     49 #include <fcntl.h>
     50 #include <gelf.h>
     51 #include <errno.h>
     52 
     53 #include <sys/mman.h>
     54 #include <sys/stat.h>
     55 #include <sys/debug_info.h>
     56 #include <sys/xen_mmu.h>
     57 #include <sys/elf.h>
     58 #include <sys/machelf.h>
     59 #include <sys/modctl.h>
     60 #include <sys/kobj.h>
     61 #include <sys/kobj_impl.h>
     62 #include <sys/sysmacros.h>
     63 #include <sys/privmregs.h>
     64 #include <vm/as.h>
     65 
     66 #include <mdb/mdb_io.h>
     67 #include <mdb/mdb_kb.h>
     68 #include <mdb/mdb_target_impl.h>
     69 
     70 #include <xen/public/xen.h>
     71 #include <xen/public/version.h>
     72 #include <xen/public/elfnote.h>
     73 
     74 #define	XKB_SHDR_NULL 0
     75 #define	XKB_SHDR_SYMTAB 1
     76 #define	XKB_SHDR_STRTAB 2
     77 #define	XKB_SHDR_SHSTRTAB 3
     78 #define	XKB_SHDR_NUM 4
     79 
     80 #define	XKB_WALK_LOCAL 0x1
     81 #define	XKB_WALK_GLOBAL 0x2
     82 #define	XKB_WALK_STR 0x4
     83 #define	XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
     84 
     85 #if defined(__i386)
     86 #define	DEBUG_INFO 0xf4bff000
     87 #define	DEBUG_INFO_HVM 0xfe7ff000
     88 #elif defined(__amd64)
     89 #define	DEBUG_INFO 0xfffffffffb7ff000
     90 #define	DEBUG_INFO_HVM 0xfffffffffb7ff000
     91 #endif
     92 
     93 #define	PAGE_SIZE 0x1000
     94 #define	PAGE_SHIFT 12
     95 #define	PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
     96 #define	PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
     97 #define	PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
     98 #define	PT_PADDR_LGPG 0x000fffffffffe000ull
     99 #define	PT_PADDR 0x000ffffffffff000ull
    100 #define	PT_VALID 0x1
    101 #define	PT_PAGESIZE 0x080
    102 #define	PTE_IS_LGPG(p, l) ((l) > 0 && ((p) & PT_PAGESIZE))
    103 
    104 #define	XC_CORE_MAGIC 0xF00FEBED
    105 #define	XC_CORE_MAGIC_HVM 0xF00FEBEE
    106 
    107 #define	VGCF_HVM_GUEST (1<<1)
    108 
    109 typedef struct xc_core_header {
    110 	unsigned int xch_magic;
    111 	unsigned int xch_nr_vcpus;
    112 	unsigned int xch_nr_pages;
    113 	unsigned int xch_ctxt_offset;
    114 	unsigned int xch_index_offset;
    115 	unsigned int xch_pages_offset;
    116 } xc_core_header_t;
    117 
    118 struct xc_elf_header {
    119 	uint64_t xeh_magic;
    120 	uint64_t xeh_nr_vcpus;
    121 	uint64_t xeh_nr_pages;
    122 	uint64_t xeh_page_size;
    123 };
    124 
    125 struct xc_elf_version {
    126 	uint64_t xev_major;
    127 	uint64_t xev_minor;
    128 	xen_extraversion_t xev_extra;
    129 	xen_compile_info_t xev_compile_info;
    130 	xen_capabilities_info_t xev_capabilities;
    131 	xen_changeset_info_t xev_changeset;
    132 	xen_platform_parameters_t xev_platform_parameters;
    133 	uint64_t xev_pagesize;
    134 };
    135 
    136 /*
    137  * Either an old-style (3.0.4) core format, or the ELF format.
    138  */
    139 typedef enum {
    140 	XKB_FORMAT_UNKNOWN = 0,
    141 	XKB_FORMAT_CORE = 1,
    142 	XKB_FORMAT_ELF = 2
    143 } xkb_type_t;
    144 
    145 typedef struct mfn_map {
    146 	mfn_t mm_mfn;
    147 	char *mm_map;
    148 } mfn_map_t;
    149 
    150 typedef struct mmu_info {
    151 	size_t mi_max;
    152 	size_t mi_shift[4];
    153 	size_t mi_ptes;
    154 	size_t mi_ptesize;
    155 } mmu_info_t;
    156 
    157 typedef struct xkb_core {
    158 	xc_core_header_t xc_hdr;
    159 	void *xc_p2m_buf;
    160 } xkb_core_t;
    161 
    162 typedef struct xkb_elf {
    163 	mdb_gelf_file_t *xe_gelf;
    164 	size_t *xe_off;
    165 	struct xc_elf_header xe_hdr;
    166 	struct xc_elf_version xe_version;
    167 } xkb_elf_t;
    168 
    169 typedef struct xkb {
    170 	char *xkb_path;
    171 	int xkb_fd;
    172 	int xkb_is_hvm;
    173 
    174 	xkb_type_t xkb_type;
    175 	xkb_core_t xkb_core;
    176 	xkb_elf_t xkb_elf;
    177 
    178 	size_t xkb_nr_vcpus;
    179 	size_t xkb_nr_pages;
    180 	size_t xkb_pages_off;
    181 	xen_pfn_t xkb_max_pfn;
    182 	mfn_t xkb_max_mfn;
    183 	int xkb_is_pae;
    184 
    185 	mmu_info_t xkb_mmu;
    186 	debug_info_t xkb_info;
    187 
    188 	void *xkb_vcpu_data;
    189 	size_t xkb_vcpu_data_sz;
    190 	struct vcpu_guest_context **xkb_vcpus;
    191 
    192 	char *xkb_pages;
    193 	mfn_t *xkb_p2m;
    194 	xen_pfn_t *xkb_m2p;
    195 	mfn_map_t xkb_pt_map[4];
    196 	mfn_map_t xkb_map;
    197 
    198 	char *xkb_namelist;
    199 	size_t xkb_namesize;
    200 } xkb_t;
    201 
    202 static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
    203 
    204 typedef struct xkb_namelist {
    205 	Ehdr	kh_elf_hdr;
    206 	Phdr	kh_text_phdr;
    207 	Phdr	kh_data_phdr;
    208 	Shdr	kh_shdr[XKB_SHDR_NUM];
    209 	char	shstrings[sizeof (xkb_shstrtab)];
    210 } xkb_namelist_t;
    211 
    212 static int xkb_build_ksyms(xkb_t *);
    213 static offset_t xkb_mfn_to_offset(xkb_t *, mfn_t);
    214 static mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t);
    215 static ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t);
    216 static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
    217 static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
    218 static int xkb_close(xkb_t *);
    219 
    220 /*
    221  * Jump through the hoops we need to to correctly identify a core file
    222  * of either the old or new format.
    223  */
    224 int
    225 xkb_identify(const char *file, int *longmode)
    226 {
    227 	xc_core_header_t header;
    228 	mdb_gelf_file_t *gf = NULL;
    229 	mdb_gelf_sect_t *sect = NULL;
    230 	mdb_io_t *io = NULL;
    231 	char *notes = NULL;
    232 	char *pos;
    233 	int ret = 0;
    234 	size_t sz;
    235 	int fd;
    236 
    237 	if ((fd = open64(file, O_RDONLY)) == -1)
    238 		return (-1);
    239 
    240 	if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) {
    241 		(void) close(fd);
    242 		return (0);
    243 	}
    244 
    245 	(void) close(fd);
    246 
    247 	if (header.xch_magic == XC_CORE_MAGIC) {
    248 		*longmode = 0;
    249 
    250 		/*
    251 		 * Indeed.
    252 		 */
    253 		sz = header.xch_index_offset - header.xch_ctxt_offset;
    254 #ifdef _LP64
    255 		if (sizeof (struct vcpu_guest_context) *
    256 		    header.xch_nr_vcpus == sz)
    257 			*longmode = 1;
    258 #else
    259 		if (sizeof (struct vcpu_guest_context) *
    260 		    header.xch_nr_vcpus != sz)
    261 			*longmode = 1;
    262 #endif /* _LP64 */
    263 
    264 		return (1);
    265 	}
    266 
    267 	if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
    268 		return (-1);
    269 
    270 	if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
    271 		goto out;
    272 
    273 	if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
    274 		goto out;
    275 
    276 	if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
    277 		goto out;
    278 
    279 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
    280 		struct xc_elf_version *vers;
    281 		/* LINTED - alignment */
    282 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
    283 		char *desc;
    284 		char *name;
    285 
    286 		name = pos + sizeof (*nhdr);
    287 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
    288 
    289 		pos = desc + nhdr->n_descsz;
    290 
    291 		if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
    292 			continue;
    293 
    294 		/*
    295 		 * The contents of this struct differ between 32 and 64
    296 		 * bit; however, not until past the 'xev_capabilities'
    297 		 * member, so we can just about get away with this.
    298 		 */
    299 
    300 		/* LINTED - alignment */
    301 		vers = (struct xc_elf_version *)desc;
    302 
    303 		if (strstr(vers->xev_capabilities, "x86_64")) {
    304 			/*
    305 			 * 64-bit hypervisor, but it can still be
    306 			 * a 32-bit domain core. 32-bit domain cores
    307 			 * are also dumped in Elf64 format, but they
    308 			 * have e_machine set to EM_386, not EM_AMD64.
    309 			 */
    310 			if (gf->gf_ehdr.e_machine == EM_386)
    311 				*longmode = 0;
    312 			else
    313 				*longmode = 1;
    314 		} else if (strstr(vers->xev_capabilities, "x86_32") ||
    315 		    strstr(vers->xev_capabilities, "x86_32p")) {
    316 			/*
    317 			 * 32-bit hypervisor, can only be a 32-bit core.
    318 			 */
    319 			*longmode = 0;
    320 		} else {
    321 			mdb_warn("couldn't derive word size of dump; "
    322 			    "assuming 64-bit");
    323 			*longmode = 1;
    324 		}
    325 	}
    326 
    327 	ret = 1;
    328 
    329 out:
    330 	if (gf != NULL)
    331 		mdb_gelf_destroy(gf);
    332 	else if (io != NULL)
    333 		mdb_io_destroy(io);
    334 	return (ret);
    335 }
    336 
    337 static void *
    338 xkb_fail(xkb_t *xkb, const char *msg, ...)
    339 {
    340 	va_list args;
    341 
    342 	va_start(args, msg);
    343 	if (xkb != NULL)
    344 		(void) fprintf(stderr, "%s: ", xkb->xkb_path);
    345 	(void) vfprintf(stderr, msg, args);
    346 	(void) fprintf(stderr, "\n");
    347 	va_end(args);
    348 	if (xkb != NULL)
    349 		(void) xkb_close(xkb);
    350 
    351 	errno = ENOEXEC;
    352 
    353 	return (NULL);
    354 }
    355 
    356 static int
    357 xkb_build_m2p(xkb_t *xkb)
    358 {
    359 	size_t i;
    360 
    361 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
    362 		if (xkb->xkb_p2m[i] != MFN_INVALID &&
    363 		    xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
    364 			xkb->xkb_max_mfn = xkb->xkb_p2m[i];
    365 	}
    366 
    367 	xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t),
    368 	    UM_SLEEP);
    369 
    370 	for (i = 0; i <= xkb->xkb_max_mfn; i++)
    371 		xkb->xkb_m2p[i] = PFN_INVALID;
    372 
    373 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
    374 		if (xkb->xkb_p2m[i] != MFN_INVALID)
    375 			xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
    376 	}
    377 
    378 	return (1);
    379 }
    380 
    381 /*
    382  * With FORMAT_CORE, we can use the table in the dump file directly.
    383  * Just to make things fun, they've not page-aligned the p2m table.
    384  */
    385 static int
    386 xkb_map_p2m(xkb_t *xkb)
    387 {
    388 	offset_t off;
    389 	size_t size;
    390 	xkb_core_t *xc = &xkb->xkb_core;
    391 	size_t count = xkb->xkb_nr_pages;
    392 	size_t boff = xc->xc_hdr.xch_index_offset;
    393 
    394 	size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
    395 	size = PAGE_MASK(size);
    396 	off = PAGE_MASK(boff);
    397 
    398 	/* LINTED - alignment */
    399 	xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
    400 	    MAP_SHARED, xkb->xkb_fd, off);
    401 
    402 	if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
    403 		(void) xkb_fail(xkb, "cannot map p2m table");
    404 		return (0);
    405 	}
    406 
    407 	/* LINTED - alignment */
    408 	xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
    409 	    PAGE_OFFSET(boff));
    410 
    411 	return (1);
    412 }
    413 
    414 /*
    415  * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
    416  * into a linear array indexed by pfn for convenience.  We also need to
    417  * track the mapping between mfn and the offset in the file: a pfn with
    418  * no mfn will not appear in the core file.
    419  */
    420 static int
    421 xkb_build_p2m(xkb_t *xkb)
    422 {
    423 	xkb_elf_t *xe = &xkb->xkb_elf;
    424 	mdb_gelf_sect_t *sect;
    425 	size_t size;
    426 	size_t i;
    427 
    428 	struct elf_p2m {
    429 		uint64_t pfn;
    430 		uint64_t gmfn;
    431 	} *p2m;
    432 
    433 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
    434 
    435 	if (sect == NULL) {
    436 		(void) xkb_fail(xkb, "cannot find section .xen_p2m");
    437 		return (0);
    438 	}
    439 
    440 	if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
    441 		(void) xkb_fail(xkb, "couldn't read .xen_p2m");
    442 		return (0);
    443 	}
    444 
    445 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
    446 		if (p2m[i].pfn > xkb->xkb_max_pfn)
    447 			xkb->xkb_max_pfn = p2m[i].pfn;
    448 	}
    449 
    450 	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
    451 	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
    452 	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
    453 	xe->xe_off = mdb_alloc(size, UM_SLEEP);
    454 
    455 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
    456 		xkb->xkb_p2m[i] = PFN_INVALID;
    457 		xe->xe_off[i] = (size_t)-1;
    458 	}
    459 
    460 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
    461 		xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
    462 		xe->xe_off[p2m[i].pfn] = i;
    463 	}
    464 
    465 	return (1);
    466 }
    467 
    468 /*
    469  * For HVM images, we don't have the corresponding MFN list; the table
    470  * is just a mapping from page index in the dump to the corresponding
    471  * PFN.  To simplify the other code, we'll pretend that these PFNs are
    472  * really MFNs as well, by populating xkb_p2m.
    473  */
    474 static int
    475 xkb_build_fake_p2m(xkb_t *xkb)
    476 {
    477 	xkb_elf_t *xe = &xkb->xkb_elf;
    478 	mdb_gelf_sect_t *sect;
    479 	size_t size;
    480 	size_t i;
    481 
    482 	uint64_t *p2pfn;
    483 
    484 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pfn");
    485 
    486 	if (sect == NULL) {
    487 		(void) xkb_fail(xkb, "cannot find section .xen_pfn");
    488 		return (0);
    489 	}
    490 
    491 	if ((p2pfn = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
    492 		(void) xkb_fail(xkb, "couldn't read .xen_pfn");
    493 		return (0);
    494 	}
    495 
    496 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
    497 		if (p2pfn[i] != PFN_INVALID && p2pfn[i] > xkb->xkb_max_pfn)
    498 			xkb->xkb_max_pfn = p2pfn[i];
    499 	}
    500 
    501 	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
    502 	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
    503 
    504 	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
    505 	xe->xe_off = mdb_alloc(size, UM_SLEEP);
    506 
    507 	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
    508 		xkb->xkb_p2m[i] = PFN_INVALID;
    509 		xe->xe_off[i] = (size_t)-1;
    510 	}
    511 
    512 	for (i = 0; i < xkb->xkb_nr_pages; i++) {
    513 		if (p2pfn[i] == PFN_INVALID)
    514 			continue;
    515 		xkb->xkb_p2m[p2pfn[i]] = p2pfn[i];
    516 		xe->xe_off[p2pfn[i]] = i;
    517 	}
    518 
    519 	return (1);
    520 }
    521 
    522 /*
    523  * Return the MFN of the top-level page table for the given as.
    524  */
    525 static mfn_t
    526 xkb_as_to_mfn(xkb_t *xkb, struct as *as)
    527 {
    528 	uintptr_t asp = (uintptr_t)as;
    529 	uintptr_t hatp;
    530 	uintptr_t htablep;
    531 	uintptr_t pfn;
    532 
    533 	if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp))
    534 		return (MFN_INVALID);
    535 	if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off,
    536 	    &htablep))
    537 		return (MFN_INVALID);
    538 	if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off,
    539 	    &pfn))
    540 		return (MFN_INVALID);
    541 
    542 	if (pfn > xkb->xkb_max_pfn)
    543 		return (MFN_INVALID);
    544 
    545 	return (xkb->xkb_p2m[pfn]);
    546 }
    547 
    548 static mfn_t
    549 xkb_cr3_to_pfn(xkb_t *xkb)
    550 {
    551 	uint64_t cr3 = xkb->xkb_vcpus[0]->ctrlreg[3];
    552 	if (xkb->xkb_is_hvm)
    553 		return (cr3 >> PAGE_SHIFT);
    554 	return (xen_cr3_to_pfn(cr3));
    555 }
    556 
    557 static ssize_t
    558 xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
    559     void *buf, size_t size)
    560 {
    561 	size_t left = size;
    562 	int windowed = (xkb->xkb_pages == NULL);
    563 	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
    564 
    565 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
    566 		return (-1);
    567 
    568 	while (left) {
    569 		uint64_t pos = addr + (size - left);
    570 		char *outpos = (char *)buf + (size - left);
    571 		size_t pageoff = PAGE_OFFSET(pos);
    572 		size_t sz = MIN(left, PAGE_SIZE - pageoff);
    573 		mfn_t mfn;
    574 
    575 		if (!phys) {
    576 			mfn = xkb_va_to_mfn(xkb, pos, tlmfn);
    577 			if (mfn == MFN_INVALID)
    578 				return (-1);
    579 		} else {
    580 			xen_pfn_t pfn = pos >> PAGE_SHIFT;
    581 			if (pfn > xkb->xkb_max_pfn)
    582 				return (-1);
    583 			mfn = xkb->xkb_p2m[pfn];
    584 			if (mfn == MFN_INVALID)
    585 				return (-1);
    586 		}
    587 
    588 		/*
    589 		 * If we're windowed then pread() is much faster.
    590 		 */
    591 		if (windowed) {
    592 			offset_t off = xkb_mfn_to_offset(xkb, mfn);
    593 			int ret;
    594 
    595 			if (off == ~1ULL)
    596 				return (-1);
    597 
    598 			off += pageoff;
    599 
    600 			ret = pread64(xkb->xkb_fd, outpos, sz, off);
    601 			if (ret == -1)
    602 				return (-1);
    603 			if (ret != sz)
    604 				return ((size - left) + ret);
    605 
    606 			left -= ret;
    607 		} else {
    608 			if (xkb_map_mfn(xkb, mfn, &xkb->xkb_map) == NULL)
    609 				return (-1);
    610 
    611 			bcopy(xkb->xkb_map.mm_map + pageoff, outpos, sz);
    612 
    613 			left -= sz;
    614 		}
    615 	}
    616 
    617 	return (size);
    618 }
    619 
    620 static ssize_t
    621 xkb_pread(xkb_t *xkb, uint64_t addr, void *buf, size_t size)
    622 {
    623 	return (xkb_read_helper(xkb, NULL, 1, addr, buf, size));
    624 }
    625 
    626 static ssize_t
    627 xkb_aread(xkb_t *xkb, uintptr_t addr, void *buf, size_t size, struct as *as)
    628 {
    629 	return (xkb_read_helper(xkb, as, 0, addr, buf, size));
    630 }
    631 
    632 static ssize_t
    633 xkb_read(xkb_t *xkb, uintptr_t addr, void *buf, size_t size)
    634 {
    635 	return (xkb_aread(xkb, addr, buf, size, NULL));
    636 }
    637 
    638 static int
    639 xkb_read_word(xkb_t *xkb, uintptr_t addr, uintptr_t *buf)
    640 {
    641 	if (xkb_read(xkb, addr, buf, sizeof (uintptr_t)) !=
    642 	    sizeof (uintptr_t))
    643 		return (0);
    644 	return (1);
    645 }
    646 
    647 static char *
    648 xkb_readstr(xkb_t *xkb, uintptr_t addr)
    649 {
    650 	char *str = mdb_alloc(1024, UM_SLEEP);
    651 	size_t i;
    652 
    653 	for (i = 0; i < 1024; i++) {
    654 		if (xkb_read(xkb, addr + i, &str[i], 1) != 1) {
    655 			mdb_free(str, 1024);
    656 			return (NULL);
    657 		}
    658 
    659 		if (str[i] == '\0')
    660 			break;
    661 	}
    662 
    663 	if (i == 1024) {
    664 		mdb_free(str, 1024);
    665 		return (NULL);
    666 	}
    667 
    668 	return (str);
    669 }
    670 
    671 static offset_t
    672 xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn)
    673 {
    674 	if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn)
    675 		return (-1ULL);
    676 
    677 	if (xkb->xkb_type == XKB_FORMAT_CORE)
    678 		return (PAGE_SIZE * pfn);
    679 
    680 	return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn]));
    681 }
    682 
    683 static offset_t
    684 xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
    685 {
    686 	xen_pfn_t pfn;
    687 
    688 	if (mfn > xkb->xkb_max_mfn)
    689 		return (-1ULL);
    690 
    691 	pfn = xkb->xkb_m2p[mfn];
    692 
    693 	if (pfn == PFN_INVALID)
    694 		return (-1ULL);
    695 
    696 	return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn));
    697 }
    698 
    699 static char *
    700 xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
    701 {
    702 	int windowed = (xkb->xkb_pages == NULL);
    703 	offset_t off;
    704 
    705 	if (mm->mm_mfn == mfn)
    706 		return (mm->mm_map);
    707 
    708 	mm->mm_mfn = mfn;
    709 
    710 	if (windowed) {
    711 		if (mm->mm_map != (char *)MAP_FAILED) {
    712 			(void) munmap(mm->mm_map, PAGE_SIZE);
    713 			mm->mm_map = (void *)MAP_FAILED;
    714 		}
    715 
    716 		if ((off = xkb_mfn_to_offset(xkb, mfn)) == (-1ULL))
    717 			return (NULL);
    718 
    719 		mm->mm_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
    720 		    xkb->xkb_fd, off);
    721 
    722 		if (mm->mm_map == (char *)MAP_FAILED)
    723 			return (NULL);
    724 	} else {
    725 		xen_pfn_t pfn;
    726 
    727 		mm->mm_map = NULL;
    728 
    729 		if (mfn > xkb->xkb_max_mfn)
    730 			return (NULL);
    731 
    732 		pfn = xkb->xkb_m2p[mfn];
    733 
    734 		if (pfn == PFN_INVALID)
    735 			return (NULL);
    736 
    737 		mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn);
    738 	}
    739 
    740 	return (mm->mm_map);
    741 }
    742 
    743 static uint64_t
    744 xkb_get_pte(mmu_info_t *mmu, char *ptep)
    745 {
    746 	uint64_t pte = 0;
    747 
    748 	if (mmu->mi_ptesize == 8) {
    749 		/* LINTED - alignment */
    750 		pte = *((uint64_t *)ptep);
    751 	} else {
    752 		/* LINTED - alignment */
    753 		pte = *((uint32_t *)ptep);
    754 	}
    755 
    756 	return (pte);
    757 }
    758 
    759 static mfn_t
    760 xkb_pte_to_base_mfn(uint64_t pte, size_t level)
    761 {
    762 	if (PTE_IS_LGPG(pte, level)) {
    763 		pte &= PT_PADDR_LGPG;
    764 	} else {
    765 		pte &= PT_PADDR;
    766 	}
    767 
    768 	return (pte >> PAGE_SHIFT);
    769 }
    770 
    771 /*
    772  * Resolve the given VA into an MFN, using the provided mfn as a top-level page
    773  * table.
    774  */
    775 static mfn_t
    776 xkb_va_to_mfn(xkb_t *xkb, uintptr_t va, mfn_t mfn)
    777 {
    778 	mmu_info_t *mmu = &xkb->xkb_mmu;
    779 	uint64_t pte;
    780 	size_t level;
    781 
    782 	for (level = mmu->mi_max; ; --level) {
    783 		size_t entry;
    784 
    785 		if (xkb_map_mfn(xkb, mfn, &xkb->xkb_pt_map[level]) == NULL)
    786 			return (MFN_INVALID);
    787 
    788 		entry = (va >> mmu->mi_shift[level]) & (mmu->mi_ptes - 1);
    789 
    790 		pte = xkb_get_pte(mmu, (char *)xkb->xkb_pt_map[level].mm_map +
    791 		    entry * mmu->mi_ptesize);
    792 
    793 		if ((mfn = xkb_pte_to_base_mfn(pte, level)) == MFN_INVALID)
    794 			return (MFN_INVALID);
    795 
    796 		if (level == 0)
    797 			break;
    798 
    799 		/*
    800 		 * Currently 'mfn' refers to the base MFN of the
    801 		 * large-page mapping.  Add on the 4K-sized index into
    802 		 * the large-page mapping to get the right MFN within
    803 		 * the mapping.
    804 		 */
    805 		if (PTE_IS_LGPG(pte, level)) {
    806 			mfn += (va & ((1 << mmu->mi_shift[level]) - 1)) >>
    807 			    PAGE_SHIFT;
    808 			break;
    809 		}
    810 	}
    811 
    812 	return (mfn);
    813 }
    814 
    815 static int
    816 xkb_read_module(xkb_t *xkb, uintptr_t modulep, struct module *module,
    817     uintptr_t *sym_addr, uintptr_t *sym_count, uintptr_t *str_addr)
    818 {
    819 	if (xkb_read(xkb, modulep, module, sizeof (struct module)) !=
    820 	    sizeof (struct module))
    821 		return (0);
    822 
    823 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
    824 	    offsetof(Shdr, sh_addr), sym_addr))
    825 		return (0);
    826 
    827 	if (!xkb_read_word(xkb, (uintptr_t)module->strhdr +
    828 	    offsetof(Shdr, sh_addr), str_addr))
    829 		return (0);
    830 
    831 	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
    832 	    offsetof(Shdr, sh_size), sym_count))
    833 		return (0);
    834 	*sym_count /= sizeof (Sym);
    835 
    836 	return (1);
    837 }
    838 
    839 static int
    840 xkb_read_modsyms(xkb_t *xkb, char **buf, size_t *sizes, int types,
    841     uintptr_t sym_addr, uintptr_t str_addr, uintptr_t sym_count)
    842 {
    843 	size_t i;
    844 
    845 	for (i = 0; i < sym_count; i++) {
    846 		Sym sym;
    847 		char *name;
    848 		size_t sz;
    849 		int type = XKB_WALK_GLOBAL;
    850 
    851 		if (xkb_read(xkb, sym_addr + i * sizeof (sym), &sym,
    852 		    sizeof (sym)) != sizeof (sym))
    853 			return (0);
    854 
    855 		if (GELF_ST_BIND(sym.st_info) == STB_LOCAL)
    856 			type = XKB_WALK_LOCAL;
    857 
    858 		name = xkb_readstr(xkb, str_addr + sym.st_name);
    859 
    860 		sym.st_shndx = SHN_ABS;
    861 		sym.st_name = sizes[XKB_WALK_STR];
    862 
    863 		sizes[type] += sizeof (sym);
    864 		sz = strlen(name) + 1;
    865 		sizes[XKB_WALK_STR] += sz;
    866 
    867 		if (buf != NULL) {
    868 			if (types & type) {
    869 				bcopy(&sym, *buf, sizeof (sym));
    870 				*buf += sizeof (sym);
    871 			}
    872 			if (types & XKB_WALK_STR) {
    873 				bcopy(name, *buf, sz);
    874 				*buf += sz;
    875 			}
    876 		}
    877 
    878 		mdb_free(name, 1024);
    879 	}
    880 
    881 	return (1);
    882 }
    883 
    884 static int
    885 xkb_walk_syms(xkb_t *xkb, uintptr_t modhead, char **buf,
    886     size_t *sizes, int types)
    887 {
    888 	uintptr_t modctl = modhead;
    889 	uintptr_t modulep;
    890 	struct module module;
    891 	uintptr_t sym_count;
    892 	uintptr_t sym_addr;
    893 	uintptr_t str_addr;
    894 	size_t max_iter = 500;
    895 
    896 	bzero(sizes, sizeof (*sizes) * (XKB_WALK_STR + 1));
    897 
    898 	/*
    899 	 * empty first symbol
    900 	 */
    901 	sizes[XKB_WALK_LOCAL] += sizeof (Sym);
    902 	sizes[XKB_WALK_STR] += 1;
    903 
    904 	if (buf != NULL) {
    905 		if (types & XKB_WALK_LOCAL) {
    906 			Sym tmp;
    907 			bzero(&tmp, sizeof (tmp));
    908 			bcopy(&tmp, *buf, sizeof (tmp));
    909 			*buf += sizeof (tmp);
    910 		}
    911 		if (types & XKB_WALK_STR) {
    912 			**buf = '\0';
    913 			(*buf)++;
    914 		}
    915 	}
    916 
    917 	for (;;) {
    918 		if (!xkb_read_word(xkb,
    919 		    modctl + offsetof(struct modctl, mod_mp), &modulep))
    920 			return (0);
    921 
    922 		if (modulep == NULL)
    923 			goto next;
    924 
    925 		if (!xkb_read_module(xkb, modulep, &module, &sym_addr,
    926 		    &sym_count, &str_addr))
    927 			return (0);
    928 
    929 		if ((module.flags & KOBJ_NOKSYMS))
    930 			goto next;
    931 
    932 		if (!xkb_read_modsyms(xkb, buf, sizes, types, sym_addr,
    933 		    str_addr, sym_count))
    934 			return (0);
    935 
    936 next:
    937 		if (!xkb_read_word(xkb,
    938 		    modctl + offsetof(struct modctl, mod_next), &modctl))
    939 			return (0);
    940 
    941 		if (modctl == modhead)
    942 			break;
    943 		/*
    944 		 * Try and prevent us looping forever if we have a broken list.
    945 		 */
    946 		if (--max_iter == 0)
    947 			break;
    948 	}
    949 
    950 	return (1);
    951 }
    952 
    953 /*
    954  * Userspace equivalent of ksyms_snapshot().  Since we don't have a namelist
    955  * file for hypervisor images, we fabricate one here using code similar
    956  * to that of /dev/ksyms.
    957  */
    958 static int
    959 xkb_build_ksyms(xkb_t *xkb)
    960 {
    961 	debug_info_t *info = &xkb->xkb_info;
    962 	size_t sizes[XKB_WALK_STR + 1];
    963 	xkb_namelist_t *hdr;
    964 	char *buf;
    965 	struct modctl modules;
    966 	uintptr_t module;
    967 	Shdr *shp;
    968 
    969 	if (xkb_read(xkb, info->di_modules, &modules,
    970 	    sizeof (struct modctl)) != sizeof (struct modctl))
    971 		return (0);
    972 
    973 	module = (uintptr_t)modules.mod_mp;
    974 
    975 	if (!xkb_walk_syms(xkb, info->di_modules, NULL, sizes,
    976 	    XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR))
    977 		return (0);
    978 
    979 	xkb->xkb_namesize = sizeof (xkb_namelist_t);
    980 	xkb->xkb_namesize += sizes[XKB_WALK_LOCAL];
    981 	xkb->xkb_namesize += sizes[XKB_WALK_GLOBAL];
    982 	xkb->xkb_namesize += sizes[XKB_WALK_STR];
    983 
    984 	if ((xkb->xkb_namelist = mdb_zalloc(xkb->xkb_namesize, UM_SLEEP))
    985 	    == NULL)
    986 		return (0);
    987 
    988 	/* LINTED - alignment */
    989 	hdr = (xkb_namelist_t *)xkb->xkb_namelist;
    990 
    991 	if (xkb_read(xkb, module + offsetof(struct module, hdr),
    992 	    &hdr->kh_elf_hdr, sizeof (Ehdr)) != sizeof (Ehdr))
    993 		return (0);
    994 
    995 	hdr->kh_elf_hdr.e_phoff = offsetof(xkb_namelist_t, kh_text_phdr);
    996 	hdr->kh_elf_hdr.e_shoff = offsetof(xkb_namelist_t, kh_shdr);
    997 	hdr->kh_elf_hdr.e_phnum = 2;
    998 	hdr->kh_elf_hdr.e_shnum = XKB_SHDR_NUM;
    999 	hdr->kh_elf_hdr.e_shstrndx = XKB_SHDR_SHSTRTAB;
   1000 
   1001 	hdr->kh_text_phdr.p_type = PT_LOAD;
   1002 	hdr->kh_text_phdr.p_vaddr = (Addr)info->di_s_text;
   1003 	hdr->kh_text_phdr.p_memsz = (Word)(info->di_e_text - info->di_s_text);
   1004 	hdr->kh_text_phdr.p_flags = PF_R | PF_X;
   1005 
   1006 	hdr->kh_data_phdr.p_type = PT_LOAD;
   1007 	hdr->kh_data_phdr.p_vaddr = (Addr)info->di_s_data;
   1008 	hdr->kh_data_phdr.p_memsz = (Word)(info->di_e_data - info->di_s_data);
   1009 	hdr->kh_data_phdr.p_flags = PF_R | PF_W | PF_X;
   1010 
   1011 	shp = &hdr->kh_shdr[XKB_SHDR_SYMTAB];
   1012 	shp->sh_name = 1;	/* xkb_shstrtab[1] = ".symtab" */
   1013 	shp->sh_type = SHT_SYMTAB;
   1014 	shp->sh_offset = sizeof (xkb_namelist_t);
   1015 	shp->sh_size = sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
   1016 	shp->sh_link = XKB_SHDR_STRTAB;
   1017 	shp->sh_info = sizes[XKB_WALK_LOCAL] / sizeof (Sym);
   1018 	shp->sh_addralign = sizeof (Addr);
   1019 	shp->sh_entsize = sizeof (Sym);
   1020 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
   1021 
   1022 
   1023 	shp = &hdr->kh_shdr[XKB_SHDR_STRTAB];
   1024 	shp->sh_name = 9;	/* xkb_shstrtab[9] = ".strtab" */
   1025 	shp->sh_type = SHT_STRTAB;
   1026 	shp->sh_offset = sizeof (xkb_namelist_t) +
   1027 	    sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
   1028 	shp->sh_size = sizes[XKB_WALK_STR];
   1029 	shp->sh_addralign = 1;
   1030 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
   1031 
   1032 
   1033 	shp = &hdr->kh_shdr[XKB_SHDR_SHSTRTAB];
   1034 	shp->sh_name = 17;	/* xkb_shstrtab[17] = ".shstrtab" */
   1035 	shp->sh_type = SHT_STRTAB;
   1036 	shp->sh_offset = offsetof(xkb_namelist_t, shstrings);
   1037 	shp->sh_size = sizeof (xkb_shstrtab);
   1038 	shp->sh_addralign = 1;
   1039 	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
   1040 
   1041 	bcopy(xkb_shstrtab, hdr->shstrings, sizeof (xkb_shstrtab));
   1042 
   1043 	buf = xkb->xkb_namelist + sizeof (xkb_namelist_t);
   1044 
   1045 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
   1046 	    XKB_WALK_LOCAL))
   1047 		return (0);
   1048 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
   1049 	    XKB_WALK_GLOBAL))
   1050 		return (0);
   1051 	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
   1052 	    XKB_WALK_STR))
   1053 		return (0);
   1054 
   1055 	return (1);
   1056 }
   1057 
   1058 static xkb_t *
   1059 xkb_open_core(xkb_t *xkb)
   1060 {
   1061 	xkb_core_t *xc = &xkb->xkb_core;
   1062 	size_t sz;
   1063 	int i;
   1064 	struct vcpu_guest_context *vcp;
   1065 
   1066 	xkb->xkb_type = XKB_FORMAT_CORE;
   1067 
   1068 	if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1)
   1069 		return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path));
   1070 
   1071 	if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) !=
   1072 	    sizeof (xc->xc_hdr))
   1073 		return (xkb_fail(xkb, "invalid dump file"));
   1074 
   1075 	if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM)
   1076 		return (xkb_fail(xkb, "cannot process HVM images"));
   1077 
   1078 	if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) {
   1079 		return (xkb_fail(xkb, "invalid magic %d",
   1080 		    xc->xc_hdr.xch_magic));
   1081 	}
   1082 
   1083 	/*
   1084 	 * With FORMAT_CORE, all pages are in the dump (non-existing
   1085 	 * ones are zeroed out).
   1086 	 */
   1087 	xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages;
   1088 	xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset;
   1089 	xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1;
   1090 	xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus;
   1091 
   1092 	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context);
   1093 	xkb->xkb_vcpu_data_sz = sz;
   1094 	xkb->xkb_vcpu_data = mdb_alloc(sz, UM_SLEEP);
   1095 
   1096 	if (pread64(xkb->xkb_fd, xkb->xkb_vcpu_data, sz,
   1097 	    xc->xc_hdr.xch_ctxt_offset) != sz)
   1098 		return (xkb_fail(xkb, "cannot read VCPU contexts"));
   1099 
   1100 	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
   1101 	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
   1102 
   1103 	vcp = xkb->xkb_vcpu_data;
   1104 	for (i = 0; i < xkb->xkb_nr_vcpus; i++)
   1105 		xkb->xkb_vcpus[i] = &vcp[i];
   1106 
   1107 	/*
   1108 	 * Try to map all the data pages. If we can't, fall back to the
   1109 	 * window/pread() approach, which is significantly slower.
   1110 	 */
   1111 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
   1112 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset);
   1113 
   1114 	if (xkb->xkb_pages == (char *)MAP_FAILED)
   1115 		xkb->xkb_pages = NULL;
   1116 
   1117 	/*
   1118 	 * We'd like to adapt for correctness' sake, but we have no way of
   1119 	 * detecting a PAE guest, since cr4 writes are disallowed.
   1120 	 */
   1121 	xkb->xkb_is_pae = 1;
   1122 
   1123 	if (!xkb_map_p2m(xkb))
   1124 		return (NULL);
   1125 
   1126 	return (xkb);
   1127 }
   1128 
   1129 static xkb_t *
   1130 xkb_open_elf(xkb_t *xkb)
   1131 {
   1132 	xkb_elf_t *xe = &xkb->xkb_elf;
   1133 	mdb_gelf_sect_t *sect;
   1134 	char *notes;
   1135 	char *pos;
   1136 	mdb_io_t *io;
   1137 	size_t sz;
   1138 	int i;
   1139 	void *dp;
   1140 
   1141 	if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path,
   1142 	    O_RDONLY, 0)) == NULL)
   1143 		return (xkb_fail(xkb, "failed to open"));
   1144 
   1145 	xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE);
   1146 
   1147 	if (xe->xe_gelf == NULL) {
   1148 		mdb_io_destroy(io);
   1149 		return (xkb);
   1150 	}
   1151 
   1152 	xkb->xkb_fd = mdb_fdio_fileno(io);
   1153 
   1154 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen");
   1155 
   1156 	if (sect == NULL)
   1157 		return (xkb);
   1158 
   1159 	if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
   1160 		return (xkb);
   1161 
   1162 	/*
   1163 	 * Now we know this is indeed a hypervisor core dump, even if
   1164 	 * it's corrupted.
   1165 	 */
   1166 	xkb->xkb_type = XKB_FORMAT_ELF;
   1167 
   1168 	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
   1169 		/* LINTED - alignment */
   1170 		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
   1171 		uint64_t vers;
   1172 		char *desc;
   1173 		char *name;
   1174 
   1175 		name = pos + sizeof (*nhdr);
   1176 		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
   1177 
   1178 		pos = desc + nhdr->n_descsz;
   1179 
   1180 		switch (nhdr->n_type) {
   1181 		case XEN_ELFNOTE_DUMPCORE_NONE:
   1182 			break;
   1183 
   1184 		case XEN_ELFNOTE_DUMPCORE_HEADER:
   1185 			if (nhdr->n_descsz != sizeof (struct xc_elf_header)) {
   1186 				return (xkb_fail(xkb, "invalid ELF note "
   1187 				    "XEN_ELFNOTE_DUMPCORE_HEADER\n"));
   1188 			}
   1189 
   1190 			bcopy(desc, &xe->xe_hdr,
   1191 			    sizeof (struct xc_elf_header));
   1192 			break;
   1193 
   1194 		case XEN_ELFNOTE_DUMPCORE_XEN_VERSION:
   1195 			if (nhdr->n_descsz < sizeof (struct xc_elf_version)) {
   1196 				return (xkb_fail(xkb, "invalid ELF note "
   1197 				    "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n"));
   1198 			}
   1199 
   1200 			bcopy(desc, &xe->xe_version,
   1201 			    sizeof (struct xc_elf_version));
   1202 			break;
   1203 
   1204 		case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION:
   1205 			/* LINTED - alignment */
   1206 			vers = *((uint64_t *)desc);
   1207 			if ((vers >> 32) != 0) {
   1208 				return (xkb_fail(xkb, "unknown major "
   1209 				    "version %d (expected 0)\n",
   1210 				    (int)(vers >> 32)));
   1211 			}
   1212 
   1213 			if ((vers & 0xffffffff) != 1) {
   1214 				mdb_warn("unexpected dump minor number "
   1215 				    "version %d (expected 1)\n",
   1216 				    (int)(vers & 0xffffffff));
   1217 			}
   1218 			break;
   1219 
   1220 		default:
   1221 			mdb_warn("unknown ELF note %d(%s)\n",
   1222 			    nhdr->n_type, name);
   1223 			break;
   1224 		}
   1225 	}
   1226 
   1227 	xkb->xkb_is_hvm = xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM;
   1228 
   1229 	if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC &&
   1230 	    xe->xe_hdr.xeh_magic != XC_CORE_MAGIC_HVM) {
   1231 		return (xkb_fail(xkb, "invalid magic %d",
   1232 		    xe->xe_hdr.xeh_magic));
   1233 	}
   1234 
   1235 	xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages;
   1236 	xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities,
   1237 	    "x86_32p") != NULL);
   1238 
   1239 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus");
   1240 
   1241 	if (sect == NULL)
   1242 		return (xkb_fail(xkb, "cannot find section .xen_prstatus"));
   1243 
   1244 	if (sect->gs_shdr.sh_entsize < sizeof (vcpu_guest_context_t))
   1245 		return (xkb_fail(xkb, "invalid section .xen_prstatus"));
   1246 
   1247 	xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize;
   1248 
   1249 	xkb->xkb_vcpu_data = mdb_gelf_sect_load(xe->xe_gelf, sect);
   1250 	if (xkb->xkb_vcpu_data == NULL)
   1251 		return (xkb_fail(xkb, "cannot load section .xen_prstatus"));
   1252 	xkb->xkb_vcpu_data_sz = sect->gs_shdr.sh_size;
   1253 
   1254 	/*
   1255 	 * The vcpu_guest_context structures saved in the core file
   1256 	 * are actually unions of the 64-bit and 32-bit versions.
   1257 	 * Don't rely on the entry size to match the size of
   1258 	 * the structure, but set up an array of pointers.
   1259 	 */
   1260 	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
   1261 	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
   1262 	for (i = 0; i < xkb->xkb_nr_vcpus; i++) {
   1263 		dp = ((char *)xkb->xkb_vcpu_data +
   1264 		    i * sect->gs_shdr.sh_entsize);
   1265 		xkb->xkb_vcpus[i] = dp;
   1266 	}
   1267 
   1268 	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages");
   1269 
   1270 	if (sect == NULL)
   1271 		return (xkb_fail(xkb, "cannot find section .xen_pages"));
   1272 
   1273 	if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset))
   1274 		return (xkb_fail(xkb, ".xen_pages is not page aligned"));
   1275 
   1276 	if (sect->gs_shdr.sh_entsize != PAGE_SIZE)
   1277 		return (xkb_fail(xkb, "invalid section .xen_pages"));
   1278 
   1279 	xkb->xkb_pages_off = sect->gs_shdr.sh_offset;
   1280 
   1281 	/*
   1282 	 * Try to map all the data pages. If we can't, fall back to the
   1283 	 * window/pread() approach, which is significantly slower.
   1284 	 */
   1285 	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
   1286 	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off);
   1287 
   1288 	if (xkb->xkb_pages == (char *)MAP_FAILED)
   1289 		xkb->xkb_pages = NULL;
   1290 
   1291 	if (xkb->xkb_is_hvm) {
   1292 		if (!xkb_build_fake_p2m(xkb))
   1293 			return (NULL);
   1294 	} else {
   1295 		if (!xkb_build_p2m(xkb))
   1296 			return (NULL);
   1297 	}
   1298 
   1299 	return (xkb);
   1300 }
   1301 
   1302 static void
   1303 xkb_init_mmu(xkb_t *xkb)
   1304 {
   1305 #if defined(__amd64)
   1306 	xkb->xkb_mmu.mi_max = 3;
   1307 	xkb->xkb_mmu.mi_shift[0] = 12;
   1308 	xkb->xkb_mmu.mi_shift[1] = 21;
   1309 	xkb->xkb_mmu.mi_shift[2] = 30;
   1310 	xkb->xkb_mmu.mi_shift[3] = 39;
   1311 	xkb->xkb_mmu.mi_ptes = 512;
   1312 	xkb->xkb_mmu.mi_ptesize = 8;
   1313 #elif defined(__i386)
   1314 	if (xkb->xkb_is_pae) {
   1315 		xkb->xkb_mmu.mi_max = 2;
   1316 		xkb->xkb_mmu.mi_shift[0] = 12;
   1317 		xkb->xkb_mmu.mi_shift[1] = 21;
   1318 		xkb->xkb_mmu.mi_shift[2] = 30;
   1319 		xkb->xkb_mmu.mi_ptes = 512;
   1320 		xkb->xkb_mmu.mi_ptesize = 8;
   1321 	} else {
   1322 		xkb->xkb_mmu.mi_max = 1;
   1323 		xkb->xkb_mmu.mi_shift[0] = 12;
   1324 		xkb->xkb_mmu.mi_shift[1] = 22;
   1325 		xkb->xkb_mmu.mi_ptes = 1024;
   1326 		xkb->xkb_mmu.mi_ptesize = 4;
   1327 	}
   1328 #endif
   1329 }
   1330 
   1331 /*ARGSUSED*/
   1332 xkb_t *
   1333 xkb_open(const char *namelist, const char *corefile, const char *swapfile,
   1334     int flag, const char *err)
   1335 {
   1336 	uintptr_t debug_info = DEBUG_INFO;
   1337 	struct stat64 corestat;
   1338 	xkb_t *xkb = NULL;
   1339 	size_t i;
   1340 
   1341 	if (stat64(corefile, &corestat) == -1)
   1342 		return (xkb_fail(xkb, "cannot stat %s", corefile));
   1343 
   1344 	if (flag != O_RDONLY)
   1345 		return (xkb_fail(xkb, "invalid open flags"));
   1346 
   1347 	xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
   1348 
   1349 	for (i = 0; i < 4; i++) {
   1350 		xkb->xkb_pt_map[i].mm_mfn = MFN_INVALID;
   1351 		xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
   1352 	}
   1353 
   1354 	xkb->xkb_type = XKB_FORMAT_UNKNOWN;
   1355 	xkb->xkb_map.mm_mfn = MFN_INVALID;
   1356 	xkb->xkb_map.mm_map = (char *)MAP_FAILED;
   1357 	xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED;
   1358 	xkb->xkb_fd = -1;
   1359 
   1360 	xkb->xkb_path = strdup(corefile);
   1361 
   1362 	if ((xkb = xkb_open_elf(xkb)) == NULL)
   1363 		return (NULL);
   1364 
   1365 	if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) {
   1366 		if (!xkb_open_core(xkb))
   1367 			return (NULL);
   1368 	}
   1369 
   1370 	xkb_init_mmu(xkb);
   1371 
   1372 	if (!xkb_build_m2p(xkb))
   1373 		return (NULL);
   1374 
   1375 	if (xkb->xkb_is_hvm)
   1376 		debug_info = DEBUG_INFO_HVM;
   1377 
   1378 	if (xkb_read(xkb, debug_info, &xkb->xkb_info,
   1379 	    sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info))
   1380 		return (xkb_fail(xkb, "cannot read debug_info"));
   1381 
   1382 	if (xkb->xkb_info.di_magic != DEBUG_INFO_MAGIC) {
   1383 		return (xkb_fail(xkb, "invalid debug info magic %d",
   1384 		    xkb->xkb_info.di_magic));
   1385 	}
   1386 
   1387 	if (xkb->xkb_info.di_version != DEBUG_INFO_VERSION) {
   1388 		return (xkb_fail(xkb, "unknown debug info version %d",
   1389 		    xkb->xkb_info.di_version));
   1390 	}
   1391 
   1392 	if (!xkb_build_ksyms(xkb))
   1393 		return (xkb_fail(xkb, "cannot construct namelist"));
   1394 
   1395 	return (xkb);
   1396 }
   1397 
   1398 int
   1399 xkb_close(xkb_t *xkb)
   1400 {
   1401 	size_t i, sz;
   1402 
   1403 	if (xkb == NULL)
   1404 		return (0);
   1405 
   1406 	if (xkb->xkb_m2p != NULL) {
   1407 		mdb_free(xkb->xkb_m2p,
   1408 		    (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t));
   1409 	}
   1410 
   1411 	if (xkb->xkb_pages != NULL) {
   1412 		(void) munmap((void *)xkb->xkb_pages,
   1413 		    PAGE_SIZE * xkb->xkb_nr_pages);
   1414 	} else {
   1415 		for (i = 0; i < 4; i++) {
   1416 			char *addr = xkb->xkb_pt_map[i].mm_map;
   1417 			if (addr != (char *)MAP_FAILED)
   1418 				(void) munmap((void *)addr, PAGE_SIZE);
   1419 		}
   1420 		if (xkb->xkb_map.mm_map != (char *)MAP_FAILED) {
   1421 			(void) munmap((void *)xkb->xkb_map.mm_map,
   1422 			    PAGE_SIZE);
   1423 		}
   1424 	}
   1425 
   1426 	if (xkb->xkb_namelist != NULL)
   1427 		mdb_free(xkb->xkb_namelist, xkb->xkb_namesize);
   1428 
   1429 	if (xkb->xkb_type == XKB_FORMAT_ELF) {
   1430 		xkb_elf_t *xe = &xkb->xkb_elf;
   1431 
   1432 		if (xe->xe_gelf != NULL)
   1433 			mdb_gelf_destroy(xe->xe_gelf);
   1434 
   1435 		sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
   1436 
   1437 		if (xkb->xkb_p2m != NULL)
   1438 			mdb_free(xkb->xkb_p2m, sz);
   1439 
   1440 		sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
   1441 
   1442 		if (xe->xe_off != NULL)
   1443 			mdb_free(xe->xe_off, sz);
   1444 
   1445 	} else if (xkb->xkb_type == XKB_FORMAT_CORE) {
   1446 		xkb_core_t *xc = &xkb->xkb_core;
   1447 
   1448 		if (xkb->xkb_fd != -1)
   1449 			(void) close(xkb->xkb_fd);
   1450 
   1451 		sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2);
   1452 		sz = PAGE_MASK(sz);
   1453 
   1454 		if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED)
   1455 			(void) munmap(xc->xc_p2m_buf, sz);
   1456 
   1457 		if (xkb->xkb_vcpu_data != NULL)
   1458 			mdb_free(xkb->xkb_vcpu_data, xkb->xkb_vcpu_data_sz);
   1459 	}
   1460 
   1461 	if (xkb->xkb_vcpus != NULL) {
   1462 		sz = sizeof (struct vcpu_guest_context *) *
   1463 		    xkb->xkb_nr_vcpus;
   1464 		mdb_free(xkb->xkb_vcpus, sz);
   1465 	}
   1466 
   1467 	free(xkb->xkb_path);
   1468 
   1469 	mdb_free(xkb, sizeof (*xkb));
   1470 	return (0);
   1471 }
   1472 
   1473 /*ARGSUSED*/
   1474 static mdb_io_t *
   1475 xkb_sym_io(xkb_t *xkb, const char *symfile)
   1476 {
   1477 	mdb_io_t *io = mdb_memio_create(xkb->xkb_namelist, xkb->xkb_namesize);
   1478 
   1479 	if (io == NULL)
   1480 		mdb_warn("failed to create namelist from %s", xkb->xkb_path);
   1481 
   1482 	return (io);
   1483 }
   1484 
   1485 uint64_t
   1486 xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr)
   1487 {
   1488 	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
   1489 	mfn_t mfn;
   1490 
   1491 	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
   1492 		return (-1ULL);
   1493 
   1494 	mfn = xkb_va_to_mfn(xkb, addr, tlmfn);
   1495 
   1496 	if (mfn == MFN_INVALID || mfn > xkb->xkb_max_mfn)
   1497 		return (-1ULL);
   1498 
   1499 	return (((uint64_t)xkb->xkb_m2p[mfn] << PAGE_SHIFT)
   1500 	    | PAGE_OFFSET(addr));
   1501 }
   1502 
   1503 static int
   1504 xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs)
   1505 {
   1506 	struct vcpu_guest_context *vcpu;
   1507 	struct cpu_user_regs *ur;
   1508 	struct regs *regs;
   1509 
   1510 	if (cpu >= xkb->xkb_nr_vcpus) {
   1511 		errno = EINVAL;
   1512 		return (-1);
   1513 	}
   1514 
   1515 	bzero(mregs, sizeof (*mregs));
   1516 
   1517 	vcpu = xkb->xkb_vcpus[cpu];
   1518 	ur = &vcpu->user_regs;
   1519 	regs = &mregs->pm_gregs;
   1520 
   1521 	regs->r_ss = ur->ss;
   1522 	regs->r_cs = ur->cs;
   1523 	regs->r_ds = ur->ds;
   1524 	regs->r_es = ur->es;
   1525 	regs->r_fs = ur->fs;
   1526 	regs->r_gs = ur->gs;
   1527 	regs->r_trapno = ur->entry_vector;
   1528 	regs->r_err = ur->error_code;
   1529 #ifdef __amd64
   1530 	regs->r_savfp = ur->rbp;
   1531 	regs->r_savpc = ur->rip;
   1532 	regs->r_rdi = ur->rdi;
   1533 	regs->r_rsi = ur->rsi;
   1534 	regs->r_rdx = ur->rdx;
   1535 	regs->r_rcx = ur->rcx;
   1536 	regs->r_r8 = ur->r8;
   1537 	regs->r_r9 = ur->r9;
   1538 	regs->r_rax = ur->rax;
   1539 	regs->r_rbx = ur->rbx;
   1540 	regs->r_rbp = ur->rbp;
   1541 	regs->r_r10 = ur->r10;
   1542 	regs->r_r11 = ur->r11;
   1543 	regs->r_r12 = ur->r12;
   1544 	regs->r_r13 = ur->r13;
   1545 	regs->r_r14 = ur->r14;
   1546 	regs->r_r15 = ur->r15;
   1547 	regs->r_rip = ur->rip;
   1548 	regs->r_rfl = ur->rflags;
   1549 	regs->r_rsp = ur->rsp;
   1550 #else
   1551 	regs->r_savfp = ur->ebp;
   1552 	regs->r_savpc = ur->eip;
   1553 	regs->r_edi = ur->edi;
   1554 	regs->r_esi = ur->esi;
   1555 	regs->r_ebp = ur->ebp;
   1556 	regs->r_esp = ur->esp;
   1557 	regs->r_ebx = ur->ebx;
   1558 	regs->r_edx = ur->edx;
   1559 	regs->r_ecx = ur->ecx;
   1560 	regs->r_eax = ur->eax;
   1561 	regs->r_eip = ur->eip;
   1562 	regs->r_efl = ur->eflags;
   1563 	regs->r_uesp = 0;
   1564 #endif
   1565 
   1566 	bcopy(&vcpu->ctrlreg, &mregs->pm_cr, 8 * sizeof (ulong_t));
   1567 	bcopy(&vcpu->debugreg, &mregs->pm_dr, 8 * sizeof (ulong_t));
   1568 
   1569 	mregs->pm_flags = PM_GREGS | PM_CRREGS | PM_DRREGS;
   1570 
   1571 	return (0);
   1572 }
   1573 
   1574 static mdb_kb_ops_t xpv_kb_ops = {
   1575 	.kb_open = (void *(*)())xkb_open,
   1576 	.kb_close = (int (*)())xkb_close,
   1577 	.kb_sym_io = (mdb_io_t *(*)())xkb_sym_io,
   1578 	.kb_kread = (ssize_t (*)())xkb_read,
   1579 	.kb_kwrite = (ssize_t (*)())mdb_tgt_notsup,
   1580 	.kb_aread = (ssize_t (*)())xkb_aread,
   1581 	.kb_awrite = (ssize_t (*)())mdb_tgt_notsup,
   1582 	.kb_pread = (ssize_t (*)())xkb_pread,
   1583 	.kb_pwrite = (ssize_t (*)())mdb_tgt_notsup,
   1584 	.kb_vtop = (uint64_t (*)())xkb_vtop,
   1585 	.kb_getmregs = (int (*)())xkb_getmregs
   1586 };
   1587 
   1588 mdb_kb_ops_t *
   1589 mdb_kb_ops(void)
   1590 {
   1591 	return (&xpv_kb_ops);
   1592 }
   1593 
   1594 static const mdb_dcmd_t dcmds[] = { NULL, };
   1595 static const mdb_walker_t walkers[] = { NULL, };
   1596 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
   1597 
   1598 const mdb_modinfo_t *
   1599 _mdb_init(void)
   1600 {
   1601 	return (&modinfo);
   1602 }
   1603 
   1604 void
   1605 _mdb_fini(void)
   1606 {
   1607 }
   1608