Home | History | Annotate | Download | only in genunix
      1      0    stevel /*
      2      0    stevel  * CDDL HEADER START
      3      0    stevel  *
      4      0    stevel  * The contents of this file are subject to the terms of the
      5   3290  johansen  * Common Development and Distribution License (the "License").
      6   3290  johansen  * You may not use this file except in compliance with the License.
      7      0    stevel  *
      8      0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0    stevel  * or http://www.opensolaris.org/os/licensing.
     10      0    stevel  * See the License for the specific language governing permissions
     11      0    stevel  * and limitations under the License.
     12      0    stevel  *
     13      0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0    stevel  *
     19      0    stevel  * CDDL HEADER END
     20      0    stevel  */
     21      0    stevel /*
     22   9894     Pavel  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23      0    stevel  * Use is subject to license terms.
     24      0    stevel  */
     25      0    stevel 
     26      0    stevel #include <mdb/mdb_modapi.h>
     27      0    stevel #include <sys/types.h>
     28      0    stevel #include <vm/page.h>
     29      0    stevel #include <sys/thread.h>
     30      0    stevel #include <sys/swap.h>
     31      0    stevel #include <sys/memlist.h>
     32  11185      Sean #include <sys/vnode.h>
     33   5084   johnlev #if defined(__i386) || defined(__amd64)
     34   5084   johnlev #include <sys/balloon_impl.h>
     35   5084   johnlev #endif
     36      0    stevel 
     37      0    stevel /*
     38      0    stevel  * Page walker.
     39      0    stevel  * By default, this will walk all pages in the system.  If given an
     40      0    stevel  * address, it will walk all pages belonging to the vnode at that
     41      0    stevel  * address.
     42      0    stevel  */
     43      0    stevel 
     44      0    stevel /*
     45      0    stevel  * page_walk_data
     46      0    stevel  *
     47      0    stevel  * pw_hashleft is set to -1 when walking a vnode's pages, and holds the
     48      0    stevel  * number of hash locations remaining in the page hash table when
     49      0    stevel  * walking all pages.
     50      0    stevel  *
     51      0    stevel  * The astute reader will notice that pw_hashloc is only used when
     52      0    stevel  * reading all pages (to hold a pointer to our location in the page
     53      0    stevel  * hash table), and that pw_first is only used when reading the pages
     54      0    stevel  * belonging to a particular vnode (to hold a pointer to the first
     55      0    stevel  * page).  While these could be combined to be a single pointer, they
     56      0    stevel  * are left separate for clarity.
     57      0    stevel  */
     58      0    stevel typedef struct page_walk_data {
     59      0    stevel 	long		pw_hashleft;
     60      0    stevel 	void		**pw_hashloc;
     61      0    stevel 	uintptr_t	pw_first;
     62      0    stevel } page_walk_data_t;
     63      0    stevel 
     64      0    stevel int
     65      0    stevel page_walk_init(mdb_walk_state_t *wsp)
     66      0    stevel {
     67      0    stevel 	page_walk_data_t	*pwd;
     68      0    stevel 	void	**ptr;
     69      0    stevel 	size_t	hashsz;
     70      0    stevel 	vnode_t	vn;
     71      0    stevel 
     72      0    stevel 	if (wsp->walk_addr == NULL) {
     73      0    stevel 
     74      0    stevel 		/*
     75      0    stevel 		 * Walk all pages
     76      0    stevel 		 */
     77      0    stevel 
     78      0    stevel 		if ((mdb_readvar(&ptr, "page_hash") == -1) ||
     79      0    stevel 		    (mdb_readvar(&hashsz, "page_hashsz") == -1) ||
     80      0    stevel 		    (ptr == NULL) || (hashsz == 0)) {
     81      0    stevel 			mdb_warn("page_hash, page_hashsz not found or invalid");
     82      0    stevel 			return (WALK_ERR);
     83      0    stevel 		}
     84      0    stevel 
     85      0    stevel 		/*
     86      0    stevel 		 * Since we are walking all pages, initialize hashleft
     87      0    stevel 		 * to be the remaining number of entries in the page
     88      0    stevel 		 * hash.  hashloc is set the start of the page hash
     89      0    stevel 		 * table.  Setting the walk address to 0 indicates that
     90      0    stevel 		 * we aren't currently following a hash chain, and that
     91      0    stevel 		 * we need to scan the page hash table for a page.
     92      0    stevel 		 */
     93      0    stevel 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
     94      0    stevel 		pwd->pw_hashleft = hashsz;
     95      0    stevel 		pwd->pw_hashloc = ptr;
     96      0    stevel 		wsp->walk_addr = 0;
     97      0    stevel 	} else {
     98      0    stevel 
     99      0    stevel 		/*
    100      0    stevel 		 * Walk just this vnode
    101      0    stevel 		 */
    102      0    stevel 
    103      0    stevel 		if (mdb_vread(&vn, sizeof (vnode_t), wsp->walk_addr) == -1) {
    104      0    stevel 			mdb_warn("unable to read vnode_t at %#lx",
    105      0    stevel 			    wsp->walk_addr);
    106      0    stevel 			return (WALK_ERR);
    107      0    stevel 		}
    108      0    stevel 
    109      0    stevel 		/*
    110      0    stevel 		 * We set hashleft to -1 to indicate that we are
    111      0    stevel 		 * walking a vnode, and initialize first to 0 (it is
    112      0    stevel 		 * used to terminate the walk, so it must not be set
    113      0    stevel 		 * until after we have walked the first page).  The
    114      0    stevel 		 * walk address is set to the first page.
    115      0    stevel 		 */
    116      0    stevel 		pwd = mdb_alloc(sizeof (page_walk_data_t), UM_SLEEP);
    117      0    stevel 		pwd->pw_hashleft = -1;
    118      0    stevel 		pwd->pw_first = 0;
    119      0    stevel 
    120      0    stevel 		wsp->walk_addr = (uintptr_t)vn.v_pages;
    121      0    stevel 	}
    122      0    stevel 
    123      0    stevel 	wsp->walk_data = pwd;
    124      0    stevel 
    125      0    stevel 	return (WALK_NEXT);
    126      0    stevel }
    127      0    stevel 
    128      0    stevel int
    129      0    stevel page_walk_step(mdb_walk_state_t *wsp)
    130      0    stevel {
    131      0    stevel 	page_walk_data_t	*pwd = wsp->walk_data;
    132      0    stevel 	page_t		page;
    133      0    stevel 	uintptr_t	pp;
    134      0    stevel 
    135      0    stevel 	pp = wsp->walk_addr;
    136      0    stevel 
    137      0    stevel 	if (pwd->pw_hashleft < 0) {
    138      0    stevel 
    139      0    stevel 		/* We're walking a vnode's pages */
    140      0    stevel 
    141      0    stevel 		/*
    142      0    stevel 		 * If we don't have any pages to walk, we have come
    143      0    stevel 		 * back around to the first one (we finished), or we
    144      0    stevel 		 * can't read the page we're looking at, we are done.
    145      0    stevel 		 */
    146      0    stevel 		if (pp == NULL || pp == pwd->pw_first)
    147      0    stevel 			return (WALK_DONE);
    148      0    stevel 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
    149      0    stevel 			mdb_warn("unable to read page_t at %#lx", pp);
    150      0    stevel 			return (WALK_ERR);
    151      0    stevel 		}
    152      0    stevel 
    153      0    stevel 		/*
    154      0    stevel 		 * Set the walk address to the next page, and if the
    155      0    stevel 		 * first page hasn't been set yet (i.e. we are on the
    156      0    stevel 		 * first page), set it.
    157      0    stevel 		 */
    158      0    stevel 		wsp->walk_addr = (uintptr_t)page.p_vpnext;
    159      0    stevel 		if (pwd->pw_first == NULL)
    160      0    stevel 			pwd->pw_first = pp;
    161      0    stevel 
    162      0    stevel 	} else if (pwd->pw_hashleft > 0) {
    163      0    stevel 
    164      0    stevel 		/* We're walking all pages */
    165      0    stevel 
    166      0    stevel 		/*
    167      0    stevel 		 * If pp (the walk address) is NULL, we scan through
    168      0    stevel 		 * the page hash table until we find a page.
    169      0    stevel 		 */
    170      0    stevel 		if (pp == NULL) {
    171      0    stevel 
    172      0    stevel 			/*
    173      0    stevel 			 * Iterate through the page hash table until we
    174      0    stevel 			 * find a page or reach the end.
    175      0    stevel 			 */
    176      0    stevel 			do {
    177      0    stevel 				if (mdb_vread(&pp, sizeof (uintptr_t),
    178      0    stevel 				    (uintptr_t)pwd->pw_hashloc) == -1) {
    179      0    stevel 					mdb_warn("unable to read from %#p",
    180      0    stevel 					    pwd->pw_hashloc);
    181      0    stevel 					return (WALK_ERR);
    182      0    stevel 				}
    183      0    stevel 				pwd->pw_hashleft--;
    184      0    stevel 				pwd->pw_hashloc++;
    185      0    stevel 			} while (pwd->pw_hashleft && (pp == NULL));
    186      0    stevel 
    187      0    stevel 			/*
    188      0    stevel 			 * We've reached the end; exit.
    189      0    stevel 			 */
    190      0    stevel 			if (pp == NULL)
    191      0    stevel 				return (WALK_DONE);
    192      0    stevel 		}
    193      0    stevel 
    194      0    stevel 		if (mdb_vread(&page, sizeof (page_t), pp) == -1) {
    195      0    stevel 			mdb_warn("unable to read page_t at %#lx", pp);
    196      0    stevel 			return (WALK_ERR);
    197      0    stevel 		}
    198      0    stevel 
    199      0    stevel 		/*
    200      0    stevel 		 * Set the walk address to the next page.
    201      0    stevel 		 */
    202      0    stevel 		wsp->walk_addr = (uintptr_t)page.p_hash;
    203      0    stevel 
    204      0    stevel 	} else {
    205      0    stevel 		/* We've finished walking all pages. */
    206      0    stevel 		return (WALK_DONE);
    207      0    stevel 	}
    208      0    stevel 
    209      0    stevel 	return (wsp->walk_callback(pp, &page, wsp->walk_cbdata));
    210      0    stevel }
    211      0    stevel 
    212      0    stevel void
    213      0    stevel page_walk_fini(mdb_walk_state_t *wsp)
    214      0    stevel {
    215      0    stevel 	mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
    216      0    stevel }
    217      0    stevel 
    218   9894     Pavel /*
    219   9894     Pavel  * allpages walks all pages in the system in order they appear in
    220   9894     Pavel  * the memseg structure
    221   9894     Pavel  */
    222   9894     Pavel 
    223   9894     Pavel #define	PAGE_BUFFER	128
    224   9894     Pavel 
    225   9894     Pavel int
    226   9894     Pavel allpages_walk_init(mdb_walk_state_t *wsp)
    227   9894     Pavel {
    228   9894     Pavel 	if (wsp->walk_addr != 0) {
    229   9894     Pavel 		mdb_warn("allpages only supports global walks.\n");
    230   9894     Pavel 		return (WALK_ERR);
    231   9894     Pavel 	}
    232   9894     Pavel 
    233   9894     Pavel 	if (mdb_layered_walk("memseg", wsp) == -1) {
    234   9894     Pavel 		mdb_warn("couldn't walk 'memseg'");
    235   9894     Pavel 		return (WALK_ERR);
    236   9894     Pavel 	}
    237   9894     Pavel 
    238   9894     Pavel 	wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
    239   9894     Pavel 	return (WALK_NEXT);
    240   9894     Pavel }
    241   9894     Pavel 
    242   9894     Pavel int
    243   9894     Pavel allpages_walk_step(mdb_walk_state_t *wsp)
    244   9894     Pavel {
    245   9894     Pavel 	const struct memseg *msp = wsp->walk_layer;
    246   9894     Pavel 	page_t *buf = wsp->walk_data;
    247   9894     Pavel 	size_t pg_read, i;
    248   9894     Pavel 	size_t pg_num = msp->pages_end - msp->pages_base;
    249   9894     Pavel 	const page_t *pg_addr = msp->pages;
    250   9894     Pavel 
    251   9894     Pavel 	while (pg_num > 0) {
    252   9894     Pavel 		pg_read = MIN(pg_num, PAGE_BUFFER);
    253   9894     Pavel 
    254   9894     Pavel 		if (mdb_vread(buf, pg_read * sizeof (page_t),
    255   9894     Pavel 		    (uintptr_t)pg_addr) == -1) {
    256   9894     Pavel 			mdb_warn("can't read page_t's at %#lx", pg_addr);
    257   9894     Pavel 			return (WALK_ERR);
    258   9894     Pavel 		}
    259   9894     Pavel 		for (i = 0; i < pg_read; i++) {
    260   9894     Pavel 			int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
    261   9894     Pavel 			    &buf[i], wsp->walk_cbdata);
    262   9894     Pavel 
    263   9894     Pavel 			if (ret != WALK_NEXT)
    264   9894     Pavel 				return (ret);
    265   9894     Pavel 		}
    266   9894     Pavel 		pg_num -= pg_read;
    267   9894     Pavel 		pg_addr += pg_read;
    268   9894     Pavel 	}
    269   9894     Pavel 
    270   9894     Pavel 	return (WALK_NEXT);
    271   9894     Pavel }
    272   9894     Pavel 
    273   9894     Pavel void
    274   9894     Pavel allpages_walk_fini(mdb_walk_state_t *wsp)
    275   9894     Pavel {
    276   9894     Pavel 	mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
    277   9894     Pavel }
    278   9894     Pavel 
    279   9894     Pavel /*
    280   9894     Pavel  * Hash table + LRU queue.
    281   9894     Pavel  * This table is used to cache recently read vnodes for the memstat
    282   9894     Pavel  * command, to reduce the number of mdb_vread calls.  This greatly
    283   9894     Pavel  * speeds the memstat command on on live, large CPU count systems.
    284   9894     Pavel  */
    285   9894     Pavel 
    286   9894     Pavel #define	VN_SMALL	401
    287   9894     Pavel #define	VN_LARGE	10007
    288   9894     Pavel #define	VN_HTABLE_KEY(p, hp)	((p) % ((hp)->vn_htable_buckets))
    289   9894     Pavel 
    290   9894     Pavel struct vn_htable_list {
    291   9894     Pavel 	uint_t vn_flag;				/* v_flag from vnode	*/
    292   9894     Pavel 	uintptr_t vn_ptr;			/* pointer to vnode	*/
    293   9894     Pavel 	struct vn_htable_list *vn_q_next;	/* queue next pointer	*/
    294   9894     Pavel 	struct vn_htable_list *vn_q_prev;	/* queue prev pointer	*/
    295   9894     Pavel 	struct vn_htable_list *vn_h_next;	/* hash table pointer	*/
    296   9894     Pavel };
    297   9894     Pavel 
    298   9894     Pavel /*
    299   9894     Pavel  * vn_q_first        -> points to to head of queue: the vnode that was most
    300   9894     Pavel  *                      recently used
    301   9894     Pavel  * vn_q_last         -> points to the oldest used vnode, and is freed once a new
    302   9894     Pavel  *                      vnode is read.
    303   9894     Pavel  * vn_htable         -> hash table
    304   9894     Pavel  * vn_htable_buf     -> contains htable objects
    305   9894     Pavel  * vn_htable_size    -> total number of items in the hash table
    306   9894     Pavel  * vn_htable_buckets -> number of buckets in the hash table
    307   9894     Pavel  */
    308   9894     Pavel typedef struct vn_htable {
    309   9894     Pavel 	struct vn_htable_list  *vn_q_first;
    310   9894     Pavel 	struct vn_htable_list  *vn_q_last;
    311   9894     Pavel 	struct vn_htable_list **vn_htable;
    312   9894     Pavel 	struct vn_htable_list  *vn_htable_buf;
    313   9894     Pavel 	int vn_htable_size;
    314   9894     Pavel 	int vn_htable_buckets;
    315   9894     Pavel } vn_htable_t;
    316   9894     Pavel 
    317   9894     Pavel 
    318   9894     Pavel /* allocate memory, initilize hash table and LRU queue */
    319   9894     Pavel static void
    320   9894     Pavel vn_htable_init(vn_htable_t *hp, size_t vn_size)
    321   9894     Pavel {
    322   9894     Pavel 	int i;
    323   9894     Pavel 	int htable_size = MAX(vn_size, VN_LARGE);
    324   9894     Pavel 
    325   9894     Pavel 	if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
    326   9894     Pavel 	    * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
    327   9894     Pavel 		htable_size = VN_SMALL;
    328   9894     Pavel 		hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
    329   9894     Pavel 		    * htable_size, UM_SLEEP|UM_GC);
    330   9894     Pavel 	}
    331   9894     Pavel 
    332   9894     Pavel 	hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
    333   9894     Pavel 	    * htable_size, UM_SLEEP|UM_GC);
    334   9894     Pavel 
    335   9894     Pavel 	hp->vn_q_first  = &hp->vn_htable_buf[0];
    336   9894     Pavel 	hp->vn_q_last   = &hp->vn_htable_buf[htable_size - 1];
    337   9894     Pavel 	hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
    338   9894     Pavel 	hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
    339   9894     Pavel 
    340   9894     Pavel 	for (i = 1; i < (htable_size-1); i++) {
    341   9894     Pavel 		hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
    342   9894     Pavel 		hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
    343   9894     Pavel 	}
    344   9894     Pavel 
    345   9894     Pavel 	hp->vn_htable_size = htable_size;
    346   9894     Pavel 	hp->vn_htable_buckets = htable_size;
    347   9894     Pavel }
    348   9894     Pavel 
    349   9894     Pavel 
    350   9894     Pavel /*
    351   9894     Pavel  * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
    352   9894     Pavel  * The function tries to find needed information in the following order:
    353   9894     Pavel  *
    354   9894     Pavel  * 1. check if ptr is the first in queue
    355   9894     Pavel  * 2. check if ptr is in hash table (if so move it to the top of queue)
    356   9894     Pavel  * 3. do mdb_vread, remove last queue item from queue and hash table.
    357   9894     Pavel  *    Insert new information to freed object, and put this object in to the
    358   9894     Pavel  *    top of the queue.
    359   9894     Pavel  */
    360   9894     Pavel static int
    361   9894     Pavel vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
    362   9894     Pavel {
    363   9894     Pavel 	int hkey;
    364   9894     Pavel 	struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
    365   9894     Pavel 	struct vn_htable_list  *q_first = hp->vn_q_first;
    366   9894     Pavel 
    367   9894     Pavel 	/* 1. vnode ptr is the first in queue, just get v_flag and return */
    368   9894     Pavel 	if (q_first->vn_ptr == ptr) {
    369   9894     Pavel 		vp->v_flag = q_first->vn_flag;
    370   9894     Pavel 
    371   9894     Pavel 		return (0);
    372   9894     Pavel 	}
    373   9894     Pavel 
    374   9894     Pavel 	/* 2. search the hash table for this ptr */
    375   9894     Pavel 	hkey = VN_HTABLE_KEY(ptr, hp);
    376   9894     Pavel 	hent = hp->vn_htable[hkey];
    377   9894     Pavel 	while (hent && (hent->vn_ptr != ptr))
    378   9894     Pavel 		hent = hent->vn_h_next;
    379   9894     Pavel 
    380   9894     Pavel 	/* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
    381   9894     Pavel 	if (hent == NULL) {
    382   9894     Pavel 		struct vnode vn;
    383   9894     Pavel 
    384   9894     Pavel 		if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
    385   9894     Pavel 			mdb_warn("unable to read vnode_t at %#lx", ptr);
    386   9894     Pavel 			return (-1);
    387   9894     Pavel 		}
    388   9894     Pavel 
    389   9894     Pavel 		/* we will insert read data into the last element in queue */
    390   9894     Pavel 		hent = hp->vn_q_last;
    391   9894     Pavel 
    392   9894     Pavel 		/* remove last hp->vn_q_last object from hash table */
    393   9894     Pavel 		if (hent->vn_ptr) {
    394   9894     Pavel 			htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
    395   9894     Pavel 			while (*htmp != hent)
    396   9894     Pavel 				htmp = &(*htmp)->vn_h_next;
    397   9894     Pavel 			*htmp = hent->vn_h_next;
    398   9894     Pavel 		}
    399   9894     Pavel 
    400   9894     Pavel 		/* insert data into new free object */
    401   9894     Pavel 		hent->vn_ptr  = ptr;
    402   9894     Pavel 		hent->vn_flag = vn.v_flag;
    403   9894     Pavel 
    404   9894     Pavel 		/* insert new object into hash table */
    405   9894     Pavel 		hent->vn_h_next = hp->vn_htable[hkey];
    406   9894     Pavel 		hp->vn_htable[hkey] = hent;
    407   9894     Pavel 	}
    408   9894     Pavel 
    409   9894     Pavel 	/* Remove from queue. hent is not first, vn_q_prev is not NULL */
    410   9894     Pavel 	q_next = hent->vn_q_next;
    411   9894     Pavel 	q_prev = hent->vn_q_prev;
    412   9894     Pavel 	if (q_next == NULL)
    413   9894     Pavel 		hp->vn_q_last = q_prev;
    414   9894     Pavel 	else
    415   9894     Pavel 		q_next->vn_q_prev = q_prev;
    416   9894     Pavel 	q_prev->vn_q_next = q_next;
    417   9894     Pavel 
    418   9894     Pavel 	/* Add to the front of queue */
    419   9894     Pavel 	hent->vn_q_prev = NULL;
    420   9894     Pavel 	hent->vn_q_next = q_first;
    421   9894     Pavel 	q_first->vn_q_prev = hent;
    422   9894     Pavel 	hp->vn_q_first = hent;
    423   9894     Pavel 
    424   9894     Pavel 	/* Set v_flag in vnode pointer from hent */
    425   9894     Pavel 	vp->v_flag = hent->vn_flag;
    426   9894     Pavel 
    427   9894     Pavel 	return (0);
    428   9894     Pavel }
    429   9894     Pavel 
    430      0    stevel /* Summary statistics of pages */
    431      0    stevel typedef struct memstat {
    432      0    stevel 	struct vnode    *ms_kvp;	/* Cached address of kernel vnode */
    433   9894     Pavel 	struct vnode    *ms_unused_vp;	/* Unused pages vnode pointer	  */
    434   3290  johansen 	struct vnode    *ms_zvp;	/* Cached address of zio vnode    */
    435      0    stevel 	uint64_t	ms_kmem;	/* Pages of kernel memory	  */
    436   7315  Jonathan 	uint64_t	ms_zfs_data;	/* Pages of zfs data		  */
    437      0    stevel 	uint64_t	ms_anon;	/* Pages of anonymous memory	  */
    438      0    stevel 	uint64_t	ms_vnode;	/* Pages of named (vnode) memory  */
    439      0    stevel 	uint64_t	ms_exec;	/* Pages of exec/library memory	  */
    440      0    stevel 	uint64_t	ms_cachelist;	/* Pages on the cachelist (free)  */
    441      0    stevel 	uint64_t	ms_total;	/* Pages on page hash		  */
    442   9894     Pavel 	vn_htable_t	*ms_vn_htable;	/* Pointer to hash table	  */
    443   9894     Pavel 	struct vnode	ms_vn;		/* vnode buffer			  */
    444      0    stevel } memstat_t;
    445   3290  johansen 
    446   3290  johansen #define	MS_PP_ISKAS(pp, stats)				\
    447   7315  Jonathan 	((pp)->p_vnode == (stats)->ms_kvp)
    448   7315  Jonathan 
    449   7315  Jonathan #define	MS_PP_ISZFS_DATA(pp, stats)			\
    450   7315  Jonathan 	(((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
    451      0    stevel 
    452      0    stevel /*
    453   9894     Pavel  * Summarize pages by type and update stat information
    454      0    stevel  */
    455      0    stevel 
    456      0    stevel /* ARGSUSED */
    457      0    stevel static int
    458      0    stevel memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
    459      0    stevel {
    460   9894     Pavel 	struct vnode *vp = &stats->ms_vn;
    461      0    stevel 
    462   9894     Pavel 	if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
    463   9894     Pavel 		return (WALK_NEXT);
    464   9894     Pavel 	else if (MS_PP_ISKAS(pp, stats))
    465   9894     Pavel 		stats->ms_kmem++;
    466   7315  Jonathan 	else if (MS_PP_ISZFS_DATA(pp, stats))
    467   7315  Jonathan 		stats->ms_zfs_data++;
    468   9894     Pavel 	else if (PP_ISFREE(pp))
    469   9894     Pavel 		stats->ms_cachelist++;
    470   9894     Pavel 	else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
    471   9894     Pavel 		return (WALK_ERR);
    472   9894     Pavel 	else if (IS_SWAPFSVP(vp))
    473   9894     Pavel 		stats->ms_anon++;
    474   9894     Pavel 	else if ((vp->v_flag & VVMEXEC) != 0)
    475      0    stevel 		stats->ms_exec++;
    476      0    stevel 	else
    477      0    stevel 		stats->ms_vnode++;
    478      0    stevel 
    479      0    stevel 	stats->ms_total++;
    480      0    stevel 
    481      0    stevel 	return (WALK_NEXT);
    482      0    stevel }
    483      0    stevel 
    484      0    stevel /* ARGSUSED */
    485      0    stevel int
    486      0    stevel memstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    487      0    stevel {
    488      0    stevel 	ulong_t pagesize;
    489   7180   jwadams 	pgcnt_t total_pages, physmem;
    490   7180   jwadams 	ulong_t freemem;
    491      0    stevel 	memstat_t stats;
    492      0    stevel 	GElf_Sym sym;
    493   9894     Pavel 	vn_htable_t ht;
    494  11185      Sean 	struct vnode *kvps;
    495   9894     Pavel 	uintptr_t vn_size = 0;
    496   5084   johnlev #if defined(__i386) || defined(__amd64)
    497   5084   johnlev 	bln_stats_t bln_stats;
    498   5084   johnlev 	ssize_t bln_size;
    499   5084   johnlev #endif
    500      0    stevel 
    501      0    stevel 	bzero(&stats, sizeof (memstat_t));
    502      0    stevel 
    503   9894     Pavel 	/*
    504   9894     Pavel 	 * -s size, is an internal option. It specifies the size of vn_htable.
    505   9894     Pavel 	 * Hash table size is set in the following order:
    506   9894     Pavel 	 * If user has specified the size that is larger than VN_LARGE: try it,
    507   9894     Pavel 	 * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
    508   9894     Pavel 	 * failed to allocate default to VN_SMALL.
    509   9894     Pavel 	 * For a better efficiency of hash table it is highly recommended to
    510   9894     Pavel 	 * set size to a prime number.
    511   9894     Pavel 	 */
    512   9894     Pavel 	if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
    513   9894     Pavel 	    's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
    514      0    stevel 		return (DCMD_USAGE);
    515   9894     Pavel 
    516   9894     Pavel 	/* Initialize vnode hash list and queue */
    517   9894     Pavel 	vn_htable_init(&ht, vn_size);
    518   9894     Pavel 	stats.ms_vn_htable = &ht;
    519      0    stevel 
    520      0    stevel 	/* Grab base page size */
    521      0    stevel 	if (mdb_readvar(&pagesize, "_pagesize") == -1) {
    522      0    stevel 		mdb_warn("unable to read _pagesize");
    523      0    stevel 		return (DCMD_ERR);
    524      0    stevel 	}
    525      0    stevel 
    526      0    stevel 	/* Total physical memory */
    527      0    stevel 	if (mdb_readvar(&total_pages, "total_pages") == -1) {
    528      0    stevel 		mdb_warn("unable to read total_pages");
    529      0    stevel 		return (DCMD_ERR);
    530      0    stevel 	}
    531      0    stevel 
    532      0    stevel 	/* Artificially limited memory */
    533      0    stevel 	if (mdb_readvar(&physmem, "physmem") == -1) {
    534      0    stevel 		mdb_warn("unable to read physmem");
    535      0    stevel 		return (DCMD_ERR);
    536      0    stevel 	}
    537      0    stevel 
    538  11185      Sean 	/* read kernel vnode array pointer */
    539  11185      Sean 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "kvps",
    540   5084   johnlev 	    (GElf_Sym *)&sym) == -1) {
    541  11185      Sean 		mdb_warn("unable to read kvps");
    542      0    stevel 		return (DCMD_ERR);
    543      0    stevel 	}
    544  11185      Sean 	kvps = (struct vnode *)(uintptr_t)sym.st_value;
    545  11185      Sean 	stats.ms_kvp =  &kvps[KV_KVP];
    546   3290  johansen 
    547   3290  johansen 	/*
    548  11185      Sean 	 * Read the zio vnode pointer.
    549   3290  johansen 	 */
    550  11185      Sean 	stats.ms_zvp = &kvps[KV_ZVP];
    551      0    stevel 
    552   9894     Pavel 	/*
    553   9894     Pavel 	 * If physmem != total_pages, then the administrator has limited the
    554   9894     Pavel 	 * number of pages available in the system.  Excluded pages are
    555   9894     Pavel 	 * associated with the unused pages vnode.  Read this vnode so the
    556   9894     Pavel 	 * pages can be excluded in the page accounting.
    557   9894     Pavel 	 */
    558      0    stevel 	if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
    559   5084   johnlev 	    (GElf_Sym *)&sym) == -1) {
    560      0    stevel 		mdb_warn("unable to read unused_pages_vp");
    561      0    stevel 		return (DCMD_ERR);
    562      0    stevel 	}
    563   9894     Pavel 	stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
    564      0    stevel 
    565   9894     Pavel 	/* walk all pages, collect statistics */
    566   9894     Pavel 	if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
    567   9894     Pavel 	    &stats) == -1) {
    568   9894     Pavel 		mdb_warn("can't walk memseg");
    569      0    stevel 		return (DCMD_ERR);
    570      0    stevel 	}
    571      0    stevel 
    572   7180   jwadams #define	MS_PCT_TOTAL(x)	((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
    573   7180   jwadams 		((physmem) * 10)))
    574      0    stevel 
    575      0    stevel 	mdb_printf("Page Summary                Pages                MB"
    576      0    stevel 	    "  %%Tot\n");
    577      0    stevel 	mdb_printf("------------     ----------------  ----------------"
    578   5084   johnlev 	    "  ----\n");
    579   7180   jwadams 	mdb_printf("Kernel           %16llu  %16llu  %3lu%%\n",
    580      0    stevel 	    stats.ms_kmem,
    581      0    stevel 	    (uint64_t)stats.ms_kmem * pagesize / (1024 * 1024),
    582      0    stevel 	    MS_PCT_TOTAL(stats.ms_kmem));
    583   7315  Jonathan 
    584   7315  Jonathan 	if (stats.ms_zfs_data != 0)
    585   7315  Jonathan 		mdb_printf("ZFS File Data    %16llu  %16llu  %3lu%%\n",
    586   7315  Jonathan 		    stats.ms_zfs_data,
    587   7315  Jonathan 		    (uint64_t)stats.ms_zfs_data * pagesize / (1024 * 1024),
    588   7315  Jonathan 		    MS_PCT_TOTAL(stats.ms_zfs_data));
    589   7315  Jonathan 
    590   7180   jwadams 	mdb_printf("Anon             %16llu  %16llu  %3lu%%\n",
    591      0    stevel 	    stats.ms_anon,
    592      0    stevel 	    (uint64_t)stats.ms_anon * pagesize / (1024 * 1024),
    593      0    stevel 	    MS_PCT_TOTAL(stats.ms_anon));
    594   7180   jwadams 	mdb_printf("Exec and libs    %16llu  %16llu  %3lu%%\n",
    595      0    stevel 	    stats.ms_exec,
    596      0    stevel 	    (uint64_t)stats.ms_exec * pagesize / (1024 * 1024),
    597      0    stevel 	    MS_PCT_TOTAL(stats.ms_exec));
    598   7180   jwadams 	mdb_printf("Page cache       %16llu  %16llu  %3lu%%\n",
    599      0    stevel 	    stats.ms_vnode,
    600      0    stevel 	    (uint64_t)stats.ms_vnode * pagesize / (1024 * 1024),
    601      0    stevel 	    MS_PCT_TOTAL(stats.ms_vnode));
    602   7180   jwadams 	mdb_printf("Free (cachelist) %16llu  %16llu  %3lu%%\n",
    603      0    stevel 	    stats.ms_cachelist,
    604      0    stevel 	    (uint64_t)stats.ms_cachelist * pagesize / (1024 * 1024),
    605      0    stevel 	    MS_PCT_TOTAL(stats.ms_cachelist));
    606   5084   johnlev 
    607   7180   jwadams 	/*
    608   7180   jwadams 	 * occasionally, we double count pages above.  To avoid printing
    609   7180   jwadams 	 * absurdly large values for freemem, we clamp it at zero.
    610   7180   jwadams 	 */
    611   7180   jwadams 	if (physmem > stats.ms_total)
    612   7180   jwadams 		freemem = physmem - stats.ms_total;
    613   7180   jwadams 	else
    614   7180   jwadams 		freemem = 0;
    615   5084   johnlev 
    616   5084   johnlev #if defined(__i386) || defined(__amd64)
    617   5084   johnlev 	/* Are we running under Xen?  If so, get balloon memory usage. */
    618   5084   johnlev 	if ((bln_size = mdb_readvar(&bln_stats, "bln_stats")) != -1) {
    619   7180   jwadams 		if (freemem > bln_stats.bln_hv_pages)
    620   7180   jwadams 			freemem -= bln_stats.bln_hv_pages;
    621   7180   jwadams 		else
    622   7180   jwadams 			freemem = 0;
    623   5084   johnlev 	}
    624   5084   johnlev #endif
    625   5084   johnlev 
    626   7180   jwadams 	mdb_printf("Free (freelist)  %16lu  %16llu  %3lu%%\n", freemem,
    627   5084   johnlev 	    (uint64_t)freemem * pagesize / (1024 * 1024),
    628   5084   johnlev 	    MS_PCT_TOTAL(freemem));
    629   5084   johnlev 
    630   5084   johnlev #if defined(__i386) || defined(__amd64)
    631   5084   johnlev 	if (bln_size != -1) {
    632   7180   jwadams 		mdb_printf("Balloon          %16lu  %16llu  %3lu%%\n",
    633   5084   johnlev 		    bln_stats.bln_hv_pages,
    634   7180   jwadams 		    (uint64_t)bln_stats.bln_hv_pages * pagesize / (1024 * 1024),
    635   5084   johnlev 		    MS_PCT_TOTAL(bln_stats.bln_hv_pages));
    636   5084   johnlev 	}
    637   5084   johnlev #endif
    638   5084   johnlev 
    639      0    stevel 	mdb_printf("\nTotal            %16lu  %16lu\n",
    640      0    stevel 	    physmem,
    641      0    stevel 	    (uint64_t)physmem * pagesize / (1024 * 1024));
    642      0    stevel 
    643      0    stevel 	if (physmem != total_pages) {
    644      0    stevel 		mdb_printf("Physical         %16lu  %16lu\n",
    645      0    stevel 		    total_pages,
    646      0    stevel 		    (uint64_t)total_pages * pagesize / (1024 * 1024));
    647      0    stevel 	}
    648      0    stevel 
    649      0    stevel #undef MS_PCT_TOTAL
    650      0    stevel 
    651      0    stevel 	return (DCMD_OK);
    652      0    stevel }
    653      0    stevel 
    654      0    stevel int
    655      0    stevel page(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    656      0    stevel {
    657      0    stevel 	page_t	p;
    658      0    stevel 
    659      0    stevel 	if (!(flags & DCMD_ADDRSPEC)) {
    660      0    stevel 		if (mdb_walk_dcmd("page", "page", argc, argv) == -1) {
    661      0    stevel 			mdb_warn("can't walk pages");
    662      0    stevel 			return (DCMD_ERR);
    663      0    stevel 		}
    664      0    stevel 		return (DCMD_OK);
    665      0    stevel 	}
    666      0    stevel 
    667      0    stevel 	if (DCMD_HDRSPEC(flags)) {
    668      0    stevel 		mdb_printf("%<u>%?s %?s %16s %8s %3s %3s %2s %2s %2s%</u>\n",
    669      0    stevel 		    "PAGE", "VNODE", "OFFSET", "SELOCK",
    670      0    stevel 		    "LCT", "COW", "IO", "FS", "ST");
    671      0    stevel 	}
    672      0    stevel 
    673      0    stevel 	if (mdb_vread(&p, sizeof (page_t), addr) == -1) {
    674      0    stevel 		mdb_warn("can't read page_t at %#lx", addr);
    675      0    stevel 		return (DCMD_ERR);
    676      0    stevel 	}
    677      0    stevel 
    678      0    stevel 	mdb_printf("%0?lx %?p %16llx %8x %3d %3d %2x %2x %2x\n",
    679      0    stevel 	    addr, p.p_vnode, p.p_offset, p.p_selock, p.p_lckcnt, p.p_cowcnt,
    680      0    stevel 	    p.p_iolock_state, p.p_fsdata, p.p_state);
    681      0    stevel 
    682      0    stevel 	return (DCMD_OK);
    683      0    stevel }
    684      0    stevel 
    685      0    stevel int
    686      0    stevel swap_walk_init(mdb_walk_state_t *wsp)
    687      0    stevel {
    688      0    stevel 	void	*ptr;
    689      0    stevel 
    690      0    stevel 	if ((mdb_readvar(&ptr, "swapinfo") == -1) || ptr == NULL) {
    691      0    stevel 		mdb_warn("swapinfo not found or invalid");
    692      0    stevel 		return (WALK_ERR);
    693      0    stevel 	}
    694      0    stevel 
    695      0    stevel 	wsp->walk_addr = (uintptr_t)ptr;
    696      0    stevel 
    697      0    stevel 	return (WALK_NEXT);
    698      0    stevel }
    699      0    stevel 
    700      0    stevel int
    701      0    stevel swap_walk_step(mdb_walk_state_t *wsp)
    702      0    stevel {
    703      0    stevel 	uintptr_t	sip;
    704      0    stevel 	struct swapinfo	si;
    705      0    stevel 
    706      0    stevel 	sip = wsp->walk_addr;
    707      0    stevel 
    708      0    stevel 	if (sip == NULL)
    709      0    stevel 		return (WALK_DONE);
    710      0    stevel 
    711      0    stevel 	if (mdb_vread(&si, sizeof (struct swapinfo), sip) == -1) {
    712      0    stevel 		mdb_warn("unable to read swapinfo at %#lx", sip);
    713      0    stevel 		return (WALK_ERR);
    714      0    stevel 	}
    715      0    stevel 
    716      0    stevel 	wsp->walk_addr = (uintptr_t)si.si_next;
    717      0    stevel 
    718      0    stevel 	return (wsp->walk_callback(sip, &si, wsp->walk_cbdata));
    719      0    stevel }
    720      0    stevel 
    721      0    stevel int
    722      0    stevel swapinfof(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    723      0    stevel {
    724      0    stevel 	struct swapinfo	si;
    725      0    stevel 	char		*name;
    726      0    stevel 
    727      0    stevel 	if (!(flags & DCMD_ADDRSPEC)) {
    728      0    stevel 		if (mdb_walk_dcmd("swapinfo", "swapinfo", argc, argv) == -1) {
    729      0    stevel 			mdb_warn("can't walk swapinfo");
    730      0    stevel 			return (DCMD_ERR);
    731      0    stevel 		}
    732      0    stevel 		return (DCMD_OK);
    733      0    stevel 	}
    734      0    stevel 
    735      0    stevel 	if (DCMD_HDRSPEC(flags)) {
    736      0    stevel 		mdb_printf("%<u>%?s %?s %9s %9s %s%</u>\n",
    737      0    stevel 		    "ADDR", "VNODE", "PAGES", "FREE", "NAME");
    738      0    stevel 	}
    739      0    stevel 
    740      0    stevel 	if (mdb_vread(&si, sizeof (struct swapinfo), addr) == -1) {
    741      0    stevel 		mdb_warn("can't read swapinfo at %#lx", addr);
    742      0    stevel 		return (DCMD_ERR);
    743      0    stevel 	}
    744      0    stevel 
    745      0    stevel 	name = mdb_alloc(si.si_pnamelen, UM_SLEEP | UM_GC);
    746      0    stevel 	if (mdb_vread(name, si.si_pnamelen, (uintptr_t)si.si_pname) == -1)
    747      0    stevel 		name = "*error*";
    748      0    stevel 
    749      0    stevel 	mdb_printf("%0?lx %?p %9d %9d %s\n",
    750      0    stevel 	    addr, si.si_vp, si.si_npgs, si.si_nfpgs, name);
    751      0    stevel 
    752      0    stevel 	return (DCMD_OK);
    753      0    stevel }
    754      0    stevel 
    755      0    stevel int
    756      0    stevel memlist_walk_step(mdb_walk_state_t *wsp)
    757      0    stevel {
    758      0    stevel 	uintptr_t	mlp;
    759      0    stevel 	struct memlist	ml;
    760      0    stevel 
    761      0    stevel 	mlp = wsp->walk_addr;
    762      0    stevel 
    763      0    stevel 	if (mlp == NULL)
    764      0    stevel 		return (WALK_DONE);
    765      0    stevel 
    766      0    stevel 	if (mdb_vread(&ml, sizeof (struct memlist), mlp) == -1) {
    767      0    stevel 		mdb_warn("unable to read memlist at %#lx", mlp);
    768      0    stevel 		return (WALK_ERR);
    769      0    stevel 	}
    770      0    stevel 
    771      0    stevel 	wsp->walk_addr = (uintptr_t)ml.next;
    772      0    stevel 
    773      0    stevel 	return (wsp->walk_callback(mlp, &ml, wsp->walk_cbdata));
    774      0    stevel }
    775      0    stevel 
    776      0    stevel int
    777      0    stevel memlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    778      0    stevel {
    779      0    stevel 	struct memlist	ml;
    780      0    stevel 
    781      0    stevel 	if (!(flags & DCMD_ADDRSPEC)) {
    782      0    stevel 		uintptr_t ptr;
    783      0    stevel 		uint_t list = 0;
    784      0    stevel 		int i;
    785      0    stevel 		static const char *lists[] = {
    786      0    stevel 			"phys_install",
    787      0    stevel 			"phys_avail",
    788      0    stevel 			"virt_avail"
    789      0    stevel 		};
    790      0    stevel 
    791      0    stevel 		if (mdb_getopts(argc, argv,
    792      0    stevel 		    'i', MDB_OPT_SETBITS, (1 << 0), &list,
    793      0    stevel 		    'a', MDB_OPT_SETBITS, (1 << 1), &list,
    794      0    stevel 		    'v', MDB_OPT_SETBITS, (1 << 2), &list, NULL) != argc)
    795      0    stevel 			return (DCMD_USAGE);
    796      0    stevel 
    797      0    stevel 		if (!list)
    798      0    stevel 			list = 1;
    799      0    stevel 
    800      0    stevel 		for (i = 0; list; i++, list >>= 1) {
    801      0    stevel 			if (!(list & 1))
    802      0    stevel 				continue;
    803      0    stevel 			if ((mdb_readvar(&ptr, lists[i]) == -1) ||
    804      0    stevel 			    (ptr == NULL)) {
    805      0    stevel 				mdb_warn("%s not found or invalid", lists[i]);
    806      0    stevel 				return (DCMD_ERR);
    807      0    stevel 			}
    808      0    stevel 
    809      0    stevel 			mdb_printf("%s:\n", lists[i]);
    810      0    stevel 			if (mdb_pwalk_dcmd("memlist", "memlist", 0, NULL,
    811      0    stevel 			    ptr) == -1) {
    812      0    stevel 				mdb_warn("can't walk memlist");
    813      0    stevel 				return (DCMD_ERR);
    814      0    stevel 			}
    815      0    stevel 		}
    816      0    stevel 		return (DCMD_OK);
    817      0    stevel 	}
    818      0    stevel 
    819      0    stevel 	if (DCMD_HDRSPEC(flags))
    820      0    stevel 		mdb_printf("%<u>%?s %16s %16s%</u>\n", "ADDR", "BASE", "SIZE");
    821      0    stevel 
    822      0    stevel 	if (mdb_vread(&ml, sizeof (struct memlist), addr) == -1) {
    823      0    stevel 		mdb_warn("can't read memlist at %#lx", addr);
    824      0    stevel 		return (DCMD_ERR);
    825      0    stevel 	}
    826      0    stevel 
    827      0    stevel 	mdb_printf("%0?lx %16llx %16llx\n", addr, ml.address, ml.size);
    828      0    stevel 
    829      0    stevel 	return (DCMD_OK);
    830      0    stevel }
    831