Home | History | Annotate | Download | only in genunix
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <mdb/mdb_modapi.h>
     27 #include <sys/types.h>
     28 #include <vm/page.h>
     29 #include <sys/thread.h>
     30 #include <sys/swap.h>
     31 #include <sys/memlist.h>
     32 #if defined(__i386) || defined(__amd64)
     33 #include <sys/balloon_impl.h>
     34 #endif
     35 
     36 /*
     37  * Page walker.
     38  * By default, this will walk all pages in the system.  If given an
     39  * address, it will walk all pages belonging to the vnode at that
     40  * address.
     41  */
     42 
     43 /*
     44  * page_walk_data
     45  *
     46  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
     47  * number of hash locations remaining in the page hash table when
     48  * walking all pages.
     49  *
     50  * The astute reader will notice that pw_hashloc is only used when
     51  * reading all pages (to hold a pointer to our location in the page
     52  * hash table), and that pw_first is only used when reading the pages
     53  * belonging to a particular vnode (to hold a pointer to the first
     54  * page).  While these could be combined to be a single pointer, they
     55  * are left separate for clarity.
     56  */
     57 typedef struct page_walk_data {
     58 	long		pw_hashleft;
     59 	void		**pw_hashloc;
     60 	uintptr_t	pw_first;
     61 } page_walk_data_t;
     62 
     63 int
     64 page_walk_init(mdb_walk_state_t *wsp)
     65 {
     66 	page_walk_data_t	*pwd;
     67 	void	**ptr;
     68 	size_t	hashsz;
     69 	vnode_t	vn;
     70 
     71 	if (wsp->walk_addr == NULL) {
     72 
     73 		/*
     74 		 * Walk all pages
     75 		 */
     76 
     77 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
     78 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
     79 		    (ptr == NULL) || (hashsz == 0)) {
     80 			mdb_warn("page_hash, page_hashsz not found or invalid");
     81 			return (WALK_ERR);
     82 		}
     83 
     84 		/*
     85 		 * Since we are walking all pages, initialize hashleft
     86 		 * to be the remaining number of entries in the page
     87 		 * hash.  hashloc is set the start of the page hash
     88 		 * table.  Setting the walk address to 0 indicates that
     89 		 * we aren't currently following a hash chain, and that
     90 		 * we need to scan the page hash table for a page.
     91 		 */
     92 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
     93 		pwd->pw_hashleft = hashsz;
     94 		pwd->pw_hashloc = ptr;
     95 		wsp->walk_addr = 0;
     96 	} else {
     97 
     98 		/*
     99 		 * Walk just this vnode
    100 		 */
    101 
    102 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
    103 			mdb_warn("unable to read vnode_t at %#lx",
    104 			    wsp->walk_addr);
    105 			return (WALK_ERR);
    106 		}
    107 
    108 		/*
    109 		 * We set hashleft to -1 to indicate that we are
    110 		 * walking a vnode, and initialize first to 0 (it is
    111 		 * used to terminate the walk, so it must not be set
    112 		 * until after we have walked the first page).  The
    113 		 * walk address is set to the first page.
    114 		 */
    115 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
    116 		pwd->pw_hashleft = -1;
    117 		pwd->pw_first = 0;
    118 
    119 		wsp->walk_addr = (uintptr_t)vn.v_pages;
    120 	}
    121 
    122 	wsp->walk_data = pwd;
    123 
    124 	return (WALK_NEXT);
    125 }
    126 
    127 int
    128 page_walk_step(mdb_walk_state_t *wsp)
    129 {
    130 	page_walk_data_t	*pwd = wsp->walk_data;
    131 	page_t		page;
    132 	uintptr_t	pp;
    133 
    134 	pp = wsp->walk_addr;
    135 
    136 	if (pwd->pw_hashleft < 0) {
    137 
    138 		/* We're walking a vnode's pages */
    139 
    140 		/*
    141 		 * If we don't have any pages to walk, we have come
    142 		 * back around to the first one (we finished), or we
    143 		 * can't read the page we're looking at, we are done.
    144 		 */
    145 		if (pp == NULL || pp == pwd->pw_first)
    146 			return (WALK_DONE);
    147 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
    148 			mdb_warn("unable to read page_t at %#lx", pp);
    149 			return (WALK_ERR);
    150 		}
    151 
    152 		/*
    153 		 * Set the walk address to the next page, and if the
    154 		 * first page hasn't been set yet (i.e. we are on the
    155 		 * first page), set it.
    156 		 */
    157 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
    158 		if (pwd->pw_first == NULL)
    159 			pwd->pw_first = pp;
    160 
    161 	} else if (pwd->pw_hashleft > 0) {
    162 
    163 		/* We're walking all pages */
    164 
    165 		/*
    166 		 * If pp (the walk address) is NULL, we scan through
    167 		 * the page hash table until we find a page.
    168 		 */
    169 		if (pp == NULL) {
    170 
    171 			/*
    172 			 * Iterate through the page hash table until we
    173 			 * find a page or reach the end.
    174 			 */
    175 			do {
    176 				if (mdb_vread(&pp, sizeof (uintptr_t),
    177 				    (uintptr_t)pwd->pw_hashloc) == -1) {
    178 					mdb_warn("unable to read from %#p",
    179 					    pwd->pw_hashloc);
    180 					return (WALK_ERR);
    181 				}
    182 				pwd->pw_hashleft--;
    183 				pwd->pw_hashloc++;
    184 			} while (pwd->pw_hashleft && (pp == NULL));
    185 
    186 			/*
    187 			 * We've reached the end; exit.
    188 			 */
    189 			if (pp == NULL)
    190 				return (WALK_DONE);
    191 		}
    192 
    193 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
    194 			mdb_warn("unable to read page_t at %#lx", pp);
    195 			return (WALK_ERR);
    196 		}
    197 
    198 		/*
    199 		 * Set the walk address to the next page.
    200 		 */
    201 		wsp->walk_addr = (uintptr_t)page.p_hash;
    202 
    203 	} else {
    204 		/* We've finished walking all pages. */
    205 		return (WALK_DONE);
    206 	}
    207 
    208 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
    209 }
    210 
    211 void
    212 page_walk_fini(mdb_walk_state_t *wsp)
    213 {
    214 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
    215 }
    216 
    217 /*
    218  * allpages walks all pages in the system in order they appear in
    219  * the memseg structure
    220  */
    221 
    222 #define	PAGE_BUFFER	128
    223 
    224 int
    225 allpages_walk_init(mdb_walk_state_t *wsp)
    226 {
    227 	if (wsp->walk_addr != 0) {
    228 		mdb_warn("allpages only supports global walks.\n");
    229 		return (WALK_ERR);
    230 	}
    231 
    232 	if (mdb_layered_walk("memseg", wsp) == -1) {
    233 		mdb_warn("couldn't walk 'memseg'");
    234 		return (WALK_ERR);
    235 	}
    236 
    237 	wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
    238 	return (WALK_NEXT);
    239 }
    240 
    241 int
    242 allpages_walk_step(mdb_walk_state_t *wsp)
    243 {
    244 	const struct memseg *msp = wsp->walk_layer;
    245 	page_t *buf = wsp->walk_data;
    246 	size_t pg_read, i;
    247 	size_t pg_num = msp->pages_end - msp->pages_base;
    248 	const page_t *pg_addr = msp->pages;
    249 
    250 	while (pg_num > 0) {
    251 		pg_read = MIN(pg_num, PAGE_BUFFER);
    252 
    253 		if (mdb_vread(buf, pg_read * sizeof (page_t),
    254 		    (uintptr_t)pg_addr) == -1) {
    255 			mdb_warn("can't read page_t's at %#lx", pg_addr);
    256 			return (WALK_ERR);
    257 		}
    258 		for (i = 0; i < pg_read; i++) {
    259 			int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
    260 			    &buf[i], wsp->walk_cbdata);
    261 
    262 			if (ret != WALK_NEXT)
    263 				return (ret);
    264 		}
    265 		pg_num -= pg_read;
    266 		pg_addr += pg_read;
    267 	}
    268 
    269 	return (WALK_NEXT);
    270 }
    271 
    272 void
    273 allpages_walk_fini(mdb_walk_state_t *wsp)
    274 {
    275 	mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
    276 }
    277 
    278 /*
    279  * Hash table + LRU queue.
    280  * This table is used to cache recently read vnodes for the memstat
    281  * command, to reduce the number of mdb_vread calls.  This greatly
    282  * speeds the memstat command on on live, large CPU count systems.
    283  */
    284 
    285 #define	VN_SMALL	401
    286 #define	VN_LARGE	10007
    287 #define	VN_HTABLE_KEY(p, hp)	((p) % ((hp)->vn_htable_buckets))
    288 
    289 struct vn_htable_list {
    290 	uint_t vn_flag;				/* v_flag from vnode	*/
    291 	uintptr_t vn_ptr;			/* pointer to vnode	*/
    292 	struct vn_htable_list *vn_q_next;	/* queue next pointer	*/
    293 	struct vn_htable_list *vn_q_prev;	/* queue prev pointer	*/
    294 	struct vn_htable_list *vn_h_next;	/* hash table pointer	*/
    295 };
    296 
    297 /*
    298  * vn_q_first        -> points to to head of queue: the vnode that was most
    299  *                      recently used
    300  * vn_q_last         -> points to the oldest used vnode, and is freed once a new
    301  *                      vnode is read.
    302  * vn_htable         -> hash table
    303  * vn_htable_buf     -> contains htable objects
    304  * vn_htable_size    -> total number of items in the hash table
    305  * vn_htable_buckets -> number of buckets in the hash table
    306  */
    307 typedef struct vn_htable {
    308 	struct vn_htable_list  *vn_q_first;
    309 	struct vn_htable_list  *vn_q_last;
    310 	struct vn_htable_list **vn_htable;
    311 	struct vn_htable_list  *vn_htable_buf;
    312 	int vn_htable_size;
    313 	int vn_htable_buckets;
    314 } vn_htable_t;
    315 
    316 
    317 /* allocate memory, initilize hash table and LRU queue */
    318 static void
    319 vn_htable_init(vn_htable_t *hp, size_t vn_size)
    320 {
    321 	int i;
    322 	int htable_size = MAX(vn_size, VN_LARGE);
    323 
    324 	if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
    325 	    * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
    326 		htable_size = VN_SMALL;
    327 		hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
    328 		    * htable_size, UM_SLEEP|UM_GC);
    329 	}
    330 
    331 	hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
    332 	    * htable_size, UM_SLEEP|UM_GC);
    333 
    334 	hp->vn_q_first  = &hp->vn_htable_buf[0];
    335 	hp->vn_q_last   = &hp->vn_htable_buf[htable_size - 1];
    336 	hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
    337 	hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
    338 
    339 	for (i = 1; i < (htable_size-1); i++) {
    340 		hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
    341 		hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
    342 	}
    343 
    344 	hp->vn_htable_size = htable_size;
    345 	hp->vn_htable_buckets = htable_size;
    346 }
    347 
    348 
    349 /*
    350  * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
    351  * The function tries to find needed information in the following order:
    352  *
    353  * 1. check if ptr is the first in queue
    354  * 2. check if ptr is in hash table (if so move it to the top of queue)
    355  * 3. do mdb_vread, remove last queue item from queue and hash table.
    356  *    Insert new information to freed object, and put this object in to the
    357  *    top of the queue.
    358  */
    359 static int
    360 vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
    361 {
    362 	int hkey;
    363 	struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
    364 	struct vn_htable_list  *q_first = hp->vn_q_first;
    365 
    366 	/* 1. vnode ptr is the first in queue, just get v_flag and return */
    367 	if (q_first->vn_ptr == ptr) {
    368 		vp->v_flag = q_first->vn_flag;
    369 
    370 		return (0);
    371 	}
    372 
    373 	/* 2. search the hash table for this ptr */
    374 	hkey = VN_HTABLE_KEY(ptr, hp);
    375 	hent = hp->vn_htable[hkey];
    376 	while (hent && (hent->vn_ptr != ptr))
    377 		hent = hent->vn_h_next;
    378 
    379 	/* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
    380 	if (hent == NULL) {
    381 		struct vnode vn;
    382 
    383 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
    384 			mdb_warn("unable to read vnode_t at %#lx", ptr);
    385 			return (-1);
    386 		}
    387 
    388 		/* we will insert read data into the last element in queue */
    389 		hent = hp->vn_q_last;
    390 
    391 		/* remove last hp->vn_q_last object from hash table */
    392 		if (hent->vn_ptr) {
    393 			htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
    394 			while (*htmp != hent)
    395 				htmp = &(*htmp)->vn_h_next;
    396 			*htmp = hent->vn_h_next;
    397 		}
    398 
    399 		/* insert data into new free object */
    400 		hent->vn_ptr  = ptr;
    401 		hent->vn_flag = vn.v_flag;
    402 
    403 		/* insert new object into hash table */
    404 		hent->vn_h_next = hp->vn_htable[hkey];
    405 		hp->vn_htable[hkey] = hent;
    406 	}
    407 
    408 	/* Remove from queue. hent is not first, vn_q_prev is not NULL */
    409 	q_next = hent->vn_q_next;
    410 	q_prev = hent->vn_q_prev;
    411 	if (q_next == NULL)
    412 		hp->vn_q_last = q_prev;
    413 	else
    414 		q_next->vn_q_prev = q_prev;
    415 	q_prev->vn_q_next = q_next;
    416 
    417 	/* Add to the front of queue */
    418 	hent->vn_q_prev = NULL;
    419 	hent->vn_q_next = q_first;
    420 	q_first->vn_q_prev = hent;
    421 	hp->vn_q_first = hent;
    422 
    423 	/* Set v_flag in vnode pointer from hent */
    424 	vp->v_flag = hent->vn_flag;
    425 
    426 	return (0);
    427 }
    428 
    429 /* Summary statistics of pages */
    430 typedef struct memstat {
    431 	struct vnode    *ms_kvp;	/* Cached address of kernel vnode */
    432 	struct vnode    *ms_unused_vp;	/* Unused pages vnode pointer	  */
    433 	struct vnode    *ms_zvp;	/* Cached address of zio vnode    */
    434 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
    435 	uint64_t	ms_zfs_data;	/* Pages of zfs data		  */
    436 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
    437 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
    438 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
    439 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
    440 	uint64_t	ms_total;	/* Pages on page hash		  */
    441 	vn_htable_t	*ms_vn_htable;	/* Pointer to hash table	  */
    442 	struct vnode	ms_vn;		/* vnode buffer			  */
    443 } memstat_t;
    444 
    445 #define	MS_PP_ISKAS(pp, stats)				\
    446 	((pp)->p_vnode == (stats)->ms_kvp)
    447 
    448 #define	MS_PP_ISZFS_DATA(pp, stats)			\
    449 	(((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
    450 
    451 /*
    452  * Summarize pages by type and update stat information
    453  */
    454 
    455 /* ARGSUSED */
    456 static int
    457 memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
    458 {
    459 	struct vnode *vp = &stats->ms_vn;
    460 
    461 	if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
    462 		return (WALK_NEXT);
    463 	else if (MS_PP_ISKAS(pp, stats))
    464 		stats->ms_kmem++;
    465 	else if (MS_PP_ISZFS_DATA(pp, stats))
    466 		stats->ms_zfs_data++;
    467 	else if (PP_ISFREE(pp))
    468 		stats->ms_cachelist++;
    469 	else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
    470 		return (WALK_ERR);
    471 	else if (IS_SWAPFSVP(vp))
    472 		stats->ms_anon++;
    473 	else if ((vp->v_flag & VVMEXEC) != 0)
    474 		stats->ms_exec++;
    475 	else
    476 		stats->ms_vnode++;
    477 
    478 	stats->ms_total++;
    479 
    480 	return (WALK_NEXT);
    481 }
    482 
    483 /* ARGSUSED */
    484 int
    485 memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    486 {
    487 	ulong_t pagesize;
    488 	pgcnt_t total_pages, physmem;
    489 	ulong_t freemem;
    490 	memstat_t stats;
    491 	GElf_Sym sym;
    492 	vn_htable_t ht;
    493 	uintptr_t vn_size = 0;
    494 #if defined(__i386) || defined(__amd64)
    495 	bln_stats_t bln_stats;
    496 	ssize_t bln_size;
    497 #endif
    498 
    499 	bzero(&stats, sizeof (memstat_t));
    500 
    501 	/*
    502 	 * -s size, is an internal option. It specifies the size of vn_htable.
    503 	 * Hash table size is set in the following order:
    504 	 * If user has specified the size that is larger than VN_LARGE: try it,
    505 	 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
    506 	 * failed to allocate default to VN_SMALL.
    507 	 * For a better efficiency of hash table it is highly recommended to
    508 	 * set size to a prime number.
    509 	 */
    510 	if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
    511 	    's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
    512 		return (DCMD_USAGE);
    513 
    514 	/* Initialize vnode hash list and queue */
    515 	vn_htable_init(&ht, vn_size);
    516 	stats.ms_vn_htable = &ht;
    517 
    518 	/* Grab base page size */
    519 	if (mdb_readvar(&pagesize, "_pagesize") == -1) {
    520 		mdb_warn("unable to read _pagesize");
    521 		return (DCMD_ERR);
    522 	}
    523 
    524 	/* Total physical memory */
    525 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
    526 		mdb_warn("unable to read total_pages");
    527 		return (DCMD_ERR);
    528 	}
    529 
    530 	/* Artificially limited memory */
    531 	if (mdb_readvar(&physmem, "physmem") == -1) {
    532 		mdb_warn("unable to read physmem");
    533 		return (DCMD_ERR);
    534 	}
    535 
    536 	/* read kernel vnode pointer */
    537 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvp",
    538 	    (GElf_Sym *)&sym) == -1) {
    539 		mdb_warn("unable to read kvp");
    540 		return (DCMD_ERR);
    541 	}
    542 
    543 	stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
    544 
    545 	/*
    546 	 * Read the zio vnode pointer.  It may not exist on all kernels, so it
    547 	 * it isn't found, it's not a fatal error.
    548 	 */
    549 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "zvp",
    550 	    (GElf_Sym *)&sym) == -1) {
    551 		stats.ms_zvp = NULL;
    552 	} else {
    553 		stats.ms_zvp = (struct vnode *)(uintptr_t)sym.st_value;
    554 	}
    555 
    556 	/*
    557 	 * If physmem != total_pages, then the administrator has limited the
    558 	 * number of pages available in the system.  Excluded pages are
    559 	 * associated with the unused pages vnode.  Read this vnode so the
    560 	 * pages can be excluded in the page accounting.
    561 	 */
    562 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
    563 	    (GElf_Sym *)&sym) == -1) {
    564 		mdb_warn("unable to read unused_pages_vp");
    565 		return (DCMD_ERR);
    566 	}
    567 	stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
    568 
    569 	/* walk all pages, collect statistics */
    570 	if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
    571 	    &stats) == -1) {
    572 		mdb_warn("can't walk memseg");
    573 		return (DCMD_ERR);
    574 	}
    575 
    576 #define	MS_PCT_TOTAL(x)	((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
    577 		((physmem) * 10)))
    578 
    579 	mdb_printf("Page Summary                Pages                MB"
    580 	    "  %%Tot\n");
    581 	mdb_printf("------------     ----------------  ----------------"
    582 	    "  ----\n");
    583 	mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
    584 	    stats.ms_kmem,
    585 	    (uint64_t)stats.ms_kmem * pagesize / (1024 * 1024),
    586 	    MS_PCT_TOTAL(stats.ms_kmem));
    587 
    588 	if (stats.ms_zfs_data != 0)
    589 		mdb_printf("ZFS File Data    %16llu  %16llu  %3lu%%\n",
    590 		    stats.ms_zfs_data,
    591 		    (uint64_t)stats.ms_zfs_data * pagesize / (1024 * 1024),
    592 		    MS_PCT_TOTAL(stats.ms_zfs_data));
    593 
    594 	mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
    595 	    stats.ms_anon,
    596 	    (uint64_t)stats.ms_anon * pagesize / (1024 * 1024),
    597 	    MS_PCT_TOTAL(stats.ms_anon));
    598 	mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
    599 	    stats.ms_exec,
    600 	    (uint64_t)stats.ms_exec * pagesize / (1024 * 1024),
    601 	    MS_PCT_TOTAL(stats.ms_exec));
    602 	mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
    603 	    stats.ms_vnode,
    604 	    (uint64_t)stats.ms_vnode * pagesize / (1024 * 1024),
    605 	    MS_PCT_TOTAL(stats.ms_vnode));
    606 	mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
    607 	    stats.ms_cachelist,
    608 	    (uint64_t)stats.ms_cachelist * pagesize / (1024 * 1024),
    609 	    MS_PCT_TOTAL(stats.ms_cachelist));
    610 
    611 	/*
    612 	 * occasionally, we double count pages above.  To avoid printing
    613 	 * absurdly large values for freemem, we clamp it at zero.
    614 	 */
    615 	if (physmem > stats.ms_total)
    616 		freemem = physmem - stats.ms_total;
    617 	else
    618 		freemem = 0;
    619 
    620 #if defined(__i386) || defined(__amd64)
    621 	/* Are we running under Xen?  If so, get balloon memory usage. */
    622 	if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
    623 		if (freemem > bln_stats.bln_hv_pages)
    624 			freemem -= bln_stats.bln_hv_pages;
    625 		else
    626 			freemem = 0;
    627 	}
    628 #endif
    629 
    630 	mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
    631 	    (uint64_t)freemem * pagesize / (1024 * 1024),
    632 	    MS_PCT_TOTAL(freemem));
    633 
    634 #if defined(__i386) || defined(__amd64)
    635 	if (bln_size != -1) {
    636 		mdb_printf("Balloon          %16lu  %16llu  %3lu%%\n",
    637 		    bln_stats.bln_hv_pages,
    638 		    (uint64_t)bln_stats.bln_hv_pages * pagesize / (1024 * 1024),
    639 		    MS_PCT_TOTAL(bln_stats.bln_hv_pages));
    640 	}
    641 #endif
    642 
    643 	mdb_printf("\nTotal            %16lu  %16lu\n",
    644 	    physmem,
    645 	    (uint64_t)physmem * pagesize / (1024 * 1024));
    646 
    647 	if (physmem != total_pages) {
    648 		mdb_printf("Physical         %16lu  %16lu\n",
    649 		    total_pages,
    650 		    (uint64_t)total_pages * pagesize / (1024 * 1024));
    651 	}
    652 
    653 #undef MS_PCT_TOTAL
    654 
    655 	return (DCMD_OK);
    656 }
    657 
    658 int
    659 page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    660 {
    661 	page_t	p;
    662 
    663 	if (!(flags & DCMD_ADDRSPEC)) {
    664 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
    665 			mdb_warn("can't walk pages");
    666 			return (DCMD_ERR);
    667 		}
    668 		return (DCMD_OK);
    669 	}
    670 
    671 	if (DCMD_HDRSPEC(flags)) {
    672 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
    673 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
    674 		    "LCT", "COW", "IO", "FS", "ST");
    675 	}
    676 
    677 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
    678 		mdb_warn("can't read page_t at %#lx", addr);
    679 		return (DCMD_ERR);
    680 	}
    681 
    682 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
    683 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
    684 	    p.p_iolock_state, p.p_fsdata, p.p_state);
    685 
    686 	return (DCMD_OK);
    687 }
    688 
    689 int
    690 swap_walk_init(mdb_walk_state_t *wsp)
    691 {
    692 	void	*ptr;
    693 
    694 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
    695 		mdb_warn("swapinfo not found or invalid");
    696 		return (WALK_ERR);
    697 	}
    698 
    699 	wsp->walk_addr = (uintptr_t)ptr;
    700 
    701 	return (WALK_NEXT);
    702 }
    703 
    704 int
    705 swap_walk_step(mdb_walk_state_t *wsp)
    706 {
    707 	uintptr_t	sip;
    708 	struct swapinfo	si;
    709 
    710 	sip = wsp->walk_addr;
    711 
    712 	if (sip == NULL)
    713 		return (WALK_DONE);
    714 
    715 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
    716 		mdb_warn("unable to read swapinfo at %#lx", sip);
    717 		return (WALK_ERR);
    718 	}
    719 
    720 	wsp->walk_addr = (uintptr_t)si.si_next;
    721 
    722 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
    723 }
    724 
    725 int
    726 swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    727 {
    728 	struct swapinfo	si;
    729 	char		*name;
    730 
    731 	if (!(flags & DCMD_ADDRSPEC)) {
    732 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
    733 			mdb_warn("can't walk swapinfo");
    734 			return (DCMD_ERR);
    735 		}
    736 		return (DCMD_OK);
    737 	}
    738 
    739 	if (DCMD_HDRSPEC(flags)) {
    740 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
    741 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
    742 	}
    743 
    744 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
    745 		mdb_warn("can't read swapinfo at %#lx", addr);
    746 		return (DCMD_ERR);
    747 	}
    748 
    749 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
    750 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
    751 		name = "*error*";
    752 
    753 	mdb_printf("%0?lx %?p %9d %9d %s\n",
    754 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
    755 
    756 	return (DCMD_OK);
    757 }
    758 
    759 int
    760 memlist_walk_step(mdb_walk_state_t *wsp)
    761 {
    762 	uintptr_t	mlp;
    763 	struct memlist	ml;
    764 
    765 	mlp = wsp->walk_addr;
    766 
    767 	if (mlp == NULL)
    768 		return (WALK_DONE);
    769 
    770 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
    771 		mdb_warn("unable to read memlist at %#lx", mlp);
    772 		return (WALK_ERR);
    773 	}
    774 
    775 	wsp->walk_addr = (uintptr_t)ml.next;
    776 
    777 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
    778 }
    779 
    780 int
    781 memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    782 {
    783 	struct memlist	ml;
    784 
    785 	if (!(flags & DCMD_ADDRSPEC)) {
    786 		uintptr_t ptr;
    787 		uint_t list = 0;
    788 		int i;
    789 		static const char *lists[] = {
    790 			"phys_install",
    791 			"phys_avail",
    792 			"virt_avail"
    793 		};
    794 
    795 		if (mdb_getopts(argc, argv,
    796 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
    797 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
    798 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
    799 			return (DCMD_USAGE);
    800 
    801 		if (!list)
    802 			list = 1;
    803 
    804 		for (i = 0; list; i++, list >>= 1) {
    805 			if (!(list & 1))
    806 				continue;
    807 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
    808 			    (ptr == NULL)) {
    809 				mdb_warn("%s not found or invalid", lists[i]);
    810 				return (DCMD_ERR);
    811 			}
    812 
    813 			mdb_printf("%s:\n", lists[i]);
    814 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
    815 			    ptr) == -1) {
    816 				mdb_warn("can't walk memlist");
    817 				return (DCMD_ERR);
    818 			}
    819 		}
    820 		return (DCMD_OK);
    821 	}
    822 
    823 	if (DCMD_HDRSPEC(flags))
    824 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
    825 
    826 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
    827 		mdb_warn("can't read memlist at %#lx", addr);
    828 		return (DCMD_ERR);
    829 	}
    830 
    831 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.address, ml.size);
    832 
    833 	return (DCMD_OK);
    834 }
    835