Home | History | Annotate | Download | only in syscall
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 /*
     30  * lgroup system calls
     31  */
     32 
     33 #include <sys/types.h>
     34 #include <sys/errno.h>
     35 #include <sys/sunddi.h>
     36 #include <sys/systm.h>
     37 #include <sys/mman.h>
     38 #include <sys/cpupart.h>
     39 #include <sys/lgrp.h>
     40 #include <sys/lgrp_user.h>
     41 #include <sys/promif.h>		/* for prom_printf() */
     42 #include <sys/sysmacros.h>
     43 
     44 #include <vm/as.h>
     45 
     46 
     47 /* definitions for mi_validity */
     48 #define	VALID_ADDR	1
     49 #define	VALID_REQ	2
     50 
     51 /*
     52  * run through the given number of addresses and requests and return the
     53  * corresponding memory information for each address
     54  */
     55 static int
     56 meminfo(int addr_count, struct meminfo *mip)
     57 {
     58 	size_t		in_size, out_size, req_size, val_size;
     59 	struct as	*as;
     60 	struct hat	*hat;
     61 	int		i, j, out_idx, info_count;
     62 	lgrp_t		*lgrp;
     63 	pfn_t		pfn;
     64 	ssize_t		pgsz;
     65 	int		*req_array, *val_array;
     66 	uint64_t	*in_array, *out_array;
     67 	uint64_t	addr, paddr;
     68 	uintptr_t	vaddr;
     69 	int		ret = 0;
     70 	struct meminfo minfo;
     71 #if defined(_SYSCALL32_IMPL)
     72 	struct meminfo32 minfo32;
     73 #endif
     74 
     75 	/*
     76 	 * Make sure that there is at least one address to translate and
     77 	 * limit how many virtual addresses the kernel can do per call
     78 	 */
     79 	if (addr_count < 1)
     80 		return (set_errno(EINVAL));
     81 	else if (addr_count > MAX_MEMINFO_CNT)
     82 		addr_count = MAX_MEMINFO_CNT;
     83 
     84 	if (get_udatamodel() == DATAMODEL_NATIVE) {
     85 		if (copyin(mip, &minfo, sizeof (struct meminfo)))
     86 			return (set_errno(EFAULT));
     87 	}
     88 #if defined(_SYSCALL32_IMPL)
     89 	else {
     90 		bzero(&minfo, sizeof (minfo));
     91 		if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
     92 			return (set_errno(EFAULT));
     93 		minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
     94 		    minfo32.mi_inaddr;
     95 		minfo.mi_info_req = (const uint_t *)(uintptr_t)
     96 		    minfo32.mi_info_req;
     97 		minfo.mi_info_count = minfo32.mi_info_count;
     98 		minfo.mi_outdata = (uint64_t *)(uintptr_t)
     99 		    minfo32.mi_outdata;
    100 		minfo.mi_validity = (uint_t *)(uintptr_t)
    101 		    minfo32.mi_validity;
    102 	}
    103 #endif
    104 	/*
    105 	 * all the input parameters have been copied in:-
    106 	 * addr_count - number of input addresses
    107 	 * minfo.mi_inaddr - array of input addresses
    108 	 * minfo.mi_info_req - array of types of information requested
    109 	 * minfo.mi_info_count - no. of pieces of info requested for each addr
    110 	 * minfo.mi_outdata - array into which the results are placed
    111 	 * minfo.mi_validity -  array containing bitwise result codes; 0th bit
    112 	 *			evaluates validity of corresponding input
    113 	 *			address, 1st bit validity of response to first
    114 	 *			member of info_req, etc.
    115 	 */
    116 
    117 	/* make sure mi_info_count is within limit */
    118 	info_count = minfo.mi_info_count;
    119 	if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
    120 		return (set_errno(EINVAL));
    121 
    122 	/*
    123 	 * allocate buffer in_array for the input addresses and copy them in
    124 	 */
    125 	in_size = sizeof (uint64_t) * addr_count;
    126 	in_array = kmem_alloc(in_size, KM_SLEEP);
    127 	if (copyin(minfo.mi_inaddr, in_array, in_size)) {
    128 		kmem_free(in_array, in_size);
    129 		return (set_errno(EFAULT));
    130 	}
    131 
    132 	/*
    133 	 * allocate buffer req_array for the input info_reqs and copy them in
    134 	 */
    135 	req_size = sizeof (uint_t) * info_count;
    136 	req_array = kmem_alloc(req_size, KM_SLEEP);
    137 	if (copyin(minfo.mi_info_req, req_array, req_size)) {
    138 		kmem_free(req_array, req_size);
    139 		kmem_free(in_array, in_size);
    140 		return (set_errno(EFAULT));
    141 	}
    142 
    143 	/*
    144 	 * allocate buffer out_array which holds the results and will have
    145 	 * to be copied out later
    146 	 */
    147 	out_size = sizeof (uint64_t) * addr_count * info_count;
    148 	out_array = kmem_alloc(out_size, KM_SLEEP);
    149 
    150 	/*
    151 	 * allocate buffer val_array which holds the validity bits and will
    152 	 * have to be copied out later
    153 	 */
    154 	val_size = sizeof (uint_t) * addr_count;
    155 	val_array = kmem_alloc(val_size, KM_SLEEP);
    156 
    157 	if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
    158 		/* find the corresponding lgroup for each physical address */
    159 		for (i = 0; i < addr_count; i++) {
    160 			paddr = in_array[i];
    161 			pfn = btop(paddr);
    162 			lgrp = lgrp_pfn_to_lgrp(pfn);
    163 			if (lgrp) {
    164 				out_array[i] = lgrp->lgrp_id;
    165 				val_array[i] = VALID_ADDR | VALID_REQ;
    166 			} else {
    167 				out_array[i] = NULL;
    168 				val_array[i] = 0;
    169 			}
    170 		}
    171 	} else {
    172 		/* get the corresponding memory info for each virtual address */
    173 		as = curproc->p_as;
    174 
    175 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    176 		hat = as->a_hat;
    177 		for (i = out_idx = 0; i < addr_count; i++, out_idx +=
    178 		    info_count) {
    179 			addr = in_array[i];
    180 			vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
    181 			if (!as_segat(as, (caddr_t)vaddr)) {
    182 				val_array[i] = 0;
    183 				continue;
    184 			}
    185 			val_array[i] = VALID_ADDR;
    186 			pfn = hat_getpfnum(hat, (caddr_t)vaddr);
    187 			if (pfn != PFN_INVALID) {
    188 				paddr = (uint64_t)((pfn << PAGESHIFT) |
    189 					(addr & PAGEOFFSET));
    190 				for (j = 0; j < info_count; j++) {
    191 					switch (req_array[j] & MEMINFO_MASK) {
    192 					case MEMINFO_VPHYSICAL:
    193 						/*
    194 						 * return the physical address
    195 						 * corresponding to the input
    196 						 * virtual address
    197 						 */
    198 						out_array[out_idx + j] = paddr;
    199 						val_array[i] |= VALID_REQ << j;
    200 						break;
    201 					case MEMINFO_VLGRP:
    202 						/*
    203 						 * return the lgroup of physical
    204 						 * page corresponding to the
    205 						 * input virtual address
    206 						 */
    207 						lgrp = lgrp_pfn_to_lgrp(pfn);
    208 						if (lgrp) {
    209 							out_array[out_idx + j] =
    210 								lgrp->lgrp_id;
    211 							val_array[i] |=
    212 								VALID_REQ << j;
    213 						}
    214 						break;
    215 					case MEMINFO_VPAGESIZE:
    216 						/*
    217 						 * return the size of physical
    218 						 * page corresponding to the
    219 						 * input virtual address
    220 						 */
    221 						pgsz = hat_getpagesize(hat,
    222 							(caddr_t)vaddr);
    223 						if (pgsz != -1) {
    224 							out_array[out_idx + j] =
    225 									pgsz;
    226 							val_array[i] |=
    227 								VALID_REQ << j;
    228 						}
    229 						break;
    230 					case MEMINFO_VREPLCNT:
    231 						/*
    232 						 * for future use:-
    233 						 * return the no. replicated
    234 						 * physical pages corresponding
    235 						 * to the input virtual address,
    236 						 * so it is always 0 at the
    237 						 * moment
    238 						 */
    239 						out_array[out_idx + j] = 0;
    240 						val_array[i] |= VALID_REQ << j;
    241 						break;
    242 					case MEMINFO_VREPL:
    243 						/*
    244 						 * for future use:-
    245 						 * return the nth physical
    246 						 * replica of the specified
    247 						 * virtual address
    248 						 */
    249 						break;
    250 					case MEMINFO_VREPL_LGRP:
    251 						/*
    252 						 * for future use:-
    253 						 * return the lgroup of nth
    254 						 * physical replica of the
    255 						 * specified virtual address
    256 						 */
    257 						break;
    258 					case MEMINFO_PLGRP:
    259 						/*
    260 						 * this is for physical address
    261 						 * only, shouldn't mix with
    262 						 * virtual address
    263 						 */
    264 						break;
    265 					default:
    266 						break;
    267 					}
    268 				}
    269 			}
    270 		}
    271 		AS_LOCK_EXIT(as, &as->a_lock);
    272 	}
    273 
    274 	/* copy out the results and validity bits and free the buffers */
    275 	if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
    276 		(copyout(val_array, minfo.mi_validity, val_size) != 0))
    277 		ret = set_errno(EFAULT);
    278 
    279 	kmem_free(in_array, in_size);
    280 	kmem_free(out_array, out_size);
    281 	kmem_free(req_array, req_size);
    282 	kmem_free(val_array, val_size);
    283 
    284 	return (ret);
    285 }
    286 
    287 
    288 /*
    289  * Initialize lgroup affinities for thread
    290  */
    291 void
    292 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
    293 {
    294 	if (bufaddr)
    295 		*bufaddr = NULL;
    296 }
    297 
    298 
    299 /*
    300  * Free lgroup affinities for thread and set to NULL
    301  * just in case thread gets recycled
    302  */
    303 void
    304 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
    305 {
    306 	if (bufaddr && *bufaddr) {
    307 		kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
    308 		*bufaddr = NULL;
    309 	}
    310 }
    311 
    312 
    313 #define	P_ANY	-2	/* cookie specifying any ID */
    314 
    315 
    316 /*
    317  * Find LWP with given ID in specified process and get its affinity for
    318  * specified lgroup
    319  */
    320 lgrp_affinity_t
    321 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
    322 {
    323 	lgrp_affinity_t aff;
    324 	int		found;
    325 	kthread_t	*t;
    326 
    327 	ASSERT(MUTEX_HELD(&p->p_lock));
    328 
    329 	aff = LGRP_AFF_NONE;
    330 	found = 0;
    331 	t = p->p_tlist;
    332 	/*
    333 	 * The process may be executing in proc_exit() and its p->p_list may be
    334 	 * already NULL.
    335 	 */
    336 	if (t == NULL)
    337 		return (set_errno(ESRCH));
    338 
    339 	do {
    340 		if (t->t_tid == lwpid || lwpid == P_ANY) {
    341 			thread_lock(t);
    342 			/*
    343 			 * Check to see whether caller has permission to set
    344 			 * affinity for LWP
    345 			 */
    346 			if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
    347 				thread_unlock(t);
    348 				return (set_errno(EPERM));
    349 			}
    350 
    351 			if (t->t_lgrp_affinity)
    352 				aff = t->t_lgrp_affinity[lgrp];
    353 			thread_unlock(t);
    354 			found = 1;
    355 			break;
    356 		}
    357 	} while ((t = t->t_forw) != p->p_tlist);
    358 	if (!found)
    359 		aff = set_errno(ESRCH);
    360 
    361 	return (aff);
    362 }
    363 
    364 
    365 /*
    366  * Get lgroup affinity for given LWP
    367  */
    368 lgrp_affinity_t
    369 lgrp_affinity_get(lgrp_affinity_args_t *ap)
    370 {
    371 	lgrp_affinity_t		aff;
    372 	lgrp_affinity_args_t	args;
    373 	id_t			id;
    374 	idtype_t		idtype;
    375 	lgrp_id_t		lgrp;
    376 	proc_t			*p;
    377 	kthread_t		*t;
    378 
    379 	/*
    380 	 * Copyin arguments
    381 	 */
    382 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
    383 		return (set_errno(EFAULT));
    384 
    385 	id = args.id;
    386 	idtype = args.idtype;
    387 	lgrp = args.lgrp;
    388 
    389 	/*
    390 	 * Check for invalid lgroup
    391 	 */
    392 	if (lgrp < 0 || lgrp == LGRP_NONE)
    393 		return (set_errno(EINVAL));
    394 
    395 	/*
    396 	 * Check for existing lgroup
    397 	 */
    398 	if (lgrp > lgrp_alloc_max)
    399 		return (set_errno(ESRCH));
    400 
    401 	/*
    402 	 * Get lgroup affinity for given LWP or process
    403 	 */
    404 	switch (idtype) {
    405 
    406 	case P_LWPID:
    407 		/*
    408 		 * LWP in current process
    409 		 */
    410 		p = curproc;
    411 		mutex_enter(&p->p_lock);
    412 		if (id != P_MYID)	/* different thread */
    413 			aff = lgrp_affinity_get_thread(p, id, lgrp);
    414 		else {			/* current thread */
    415 			aff = LGRP_AFF_NONE;
    416 			t = curthread;
    417 			thread_lock(t);
    418 			if (t->t_lgrp_affinity)
    419 				aff = t->t_lgrp_affinity[lgrp];
    420 			thread_unlock(t);
    421 		}
    422 		mutex_exit(&p->p_lock);
    423 		break;
    424 
    425 	case P_PID:
    426 		/*
    427 		 * Process
    428 		 */
    429 		mutex_enter(&pidlock);
    430 
    431 		if (id == P_MYID)
    432 			p = curproc;
    433 		else {
    434 			p = prfind(id);
    435 			if (p == NULL) {
    436 				mutex_exit(&pidlock);
    437 				return (set_errno(ESRCH));
    438 			}
    439 		}
    440 
    441 		mutex_enter(&p->p_lock);
    442 		aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
    443 		mutex_exit(&p->p_lock);
    444 
    445 		mutex_exit(&pidlock);
    446 		break;
    447 
    448 	default:
    449 		aff = set_errno(EINVAL);
    450 		break;
    451 	}
    452 
    453 	return (aff);
    454 }
    455 
    456 
    457 /*
    458  * Find lgroup for which this thread has most affinity in specified partition
    459  * starting from home lgroup unless specified starting lgroup is preferred
    460  */
    461 lpl_t *
    462 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start,
    463     boolean_t prefer_start)
    464 {
    465 	lgrp_affinity_t	*affs;
    466 	lgrp_affinity_t	best_aff;
    467 	lpl_t		*best_lpl;
    468 	lgrp_id_t	finish;
    469 	lgrp_id_t	home;
    470 	lgrp_id_t	lgrpid;
    471 	lpl_t		*lpl;
    472 
    473 	ASSERT(t != NULL);
    474 	ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
    475 	    (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
    476 	ASSERT(cpupart != NULL);
    477 
    478 	if (t->t_lgrp_affinity == NULL)
    479 		return (NULL);
    480 
    481 	affs = t->t_lgrp_affinity;
    482 
    483 	/*
    484 	 * Thread bound to CPU
    485 	 */
    486 	if (t->t_bind_cpu != PBIND_NONE) {
    487 		cpu_t	*cp;
    488 
    489 		/*
    490 		 * Find which lpl has most affinity among leaf lpl directly
    491 		 * containing CPU and its ancestor lpls
    492 		 */
    493 		cp = cpu[t->t_bind_cpu];
    494 
    495 		best_lpl = lpl = cp->cpu_lpl;
    496 		best_aff = affs[best_lpl->lpl_lgrpid];
    497 		while (lpl->lpl_parent != NULL) {
    498 			lpl = lpl->lpl_parent;
    499 			lgrpid = lpl->lpl_lgrpid;
    500 			if (affs[lgrpid] > best_aff) {
    501 				best_lpl = lpl;
    502 				best_aff = affs[lgrpid];
    503 			}
    504 		}
    505 		return (best_lpl);
    506 	}
    507 
    508 	/*
    509 	 * Start searching from home lgroup unless given starting lgroup is
    510 	 * preferred or home lgroup isn't in given pset.  Use root lgroup as
    511 	 * starting point if both home and starting lgroups aren't in given
    512 	 * pset.
    513 	 */
    514 	ASSERT(start >= 0 && start <= lgrp_alloc_max);
    515 	home = t->t_lpl->lpl_lgrpid;
    516 	if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart))
    517 		lgrpid = home;
    518 	else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart))
    519 		lgrpid = start;
    520 	else
    521 		lgrpid = LGRP_ROOTID;
    522 
    523 	best_lpl = &cpupart->cp_lgrploads[lgrpid];
    524 	best_aff = affs[lgrpid];
    525 	finish = lgrpid;
    526 	do {
    527 		/*
    528 		 * Skip any lgroups that don't have CPU resources
    529 		 * in this processor set.
    530 		 */
    531 		if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
    532 			if (++lgrpid > lgrp_alloc_max)
    533 				lgrpid = 0;	/* wrap the search */
    534 			continue;
    535 		}
    536 
    537 		/*
    538 		 * Find lgroup with most affinity
    539 		 */
    540 		lpl = &cpupart->cp_lgrploads[lgrpid];
    541 		if (affs[lgrpid] > best_aff) {
    542 			best_aff = affs[lgrpid];
    543 			best_lpl = lpl;
    544 		}
    545 
    546 		if (++lgrpid > lgrp_alloc_max)
    547 			lgrpid = 0;	/* wrap the search */
    548 
    549 	} while (lgrpid != finish);
    550 
    551 	/*
    552 	 * No lgroup (in this pset) with any affinity
    553 	 */
    554 	if (best_aff == LGRP_AFF_NONE)
    555 		return (NULL);
    556 
    557 	lgrpid = best_lpl->lpl_lgrpid;
    558 	ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
    559 
    560 	return (best_lpl);
    561 }
    562 
    563 
    564 /*
    565  * Set thread's affinity for given lgroup
    566  */
    567 int
    568 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
    569     lgrp_affinity_t **aff_buf)
    570 {
    571 	lgrp_affinity_t	*affs;
    572 	lgrp_id_t	best;
    573 	lpl_t		*best_lpl;
    574 	lgrp_id_t	home;
    575 	int		retval;
    576 
    577 	ASSERT(t != NULL);
    578 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
    579 
    580 	retval = 0;
    581 
    582 	thread_lock(t);
    583 
    584 	/*
    585 	 * Check to see whether caller has permission to set affinity for
    586 	 * thread
    587 	 */
    588 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
    589 		thread_unlock(t);
    590 		return (set_errno(EPERM));
    591 	}
    592 
    593 	if (t->t_lgrp_affinity == NULL) {
    594 		if (aff == LGRP_AFF_NONE) {
    595 			thread_unlock(t);
    596 			return (0);
    597 		}
    598 		ASSERT(aff_buf != NULL && *aff_buf != NULL);
    599 		t->t_lgrp_affinity = *aff_buf;
    600 		*aff_buf = NULL;
    601 	}
    602 
    603 	affs = t->t_lgrp_affinity;
    604 	affs[lgrp] = aff;
    605 
    606 	/*
    607 	 * Find lgroup for which thread has most affinity,
    608 	 * starting with lgroup for which affinity being set
    609 	 */
    610 	best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE);
    611 
    612 	/*
    613 	 * Rehome if found lgroup with more affinity than home or lgroup for
    614 	 * which affinity is being set has same affinity as home
    615 	 */
    616 	home = t->t_lpl->lpl_lgrpid;
    617 	if (best_lpl != NULL && best_lpl != t->t_lpl) {
    618 		best = best_lpl->lpl_lgrpid;
    619 		if (affs[best] > affs[home] || (affs[best] == affs[home] &&
    620 		    best == lgrp))
    621 			lgrp_move_thread(t, best_lpl, 1);
    622 	}
    623 
    624 	thread_unlock(t);
    625 
    626 	return (retval);
    627 }
    628 
    629 
    630 /*
    631  * Set process' affinity for specified lgroup
    632  */
    633 int
    634 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
    635     lgrp_affinity_t **aff_buf_array)
    636 {
    637 	lgrp_affinity_t	*buf;
    638 	int		err = 0;
    639 	int		i;
    640 	int		retval;
    641 	kthread_t	*t;
    642 
    643 	ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
    644 	ASSERT(aff_buf_array != NULL);
    645 
    646 	i = 0;
    647 	t = p->p_tlist;
    648 	if (t != NULL) {
    649 		do {
    650 			/*
    651 			 * Set lgroup affinity for thread
    652 			 */
    653 			buf = aff_buf_array[i];
    654 			retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
    655 
    656 			if (err == 0 && retval != 0)
    657 				err = retval;
    658 
    659 			/*
    660 			 * Advance pointer to next buffer
    661 			 */
    662 			if (buf == NULL) {
    663 				ASSERT(i < p->p_lwpcnt);
    664 				aff_buf_array[i] = NULL;
    665 				i++;
    666 			}
    667 
    668 		} while ((t = t->t_forw) != p->p_tlist);
    669 	}
    670 	return (err);
    671 }
    672 
    673 
    674 /*
    675  * Set LWP's or process' affinity for specified lgroup
    676  *
    677  * When setting affinities, pidlock, process p_lock, and thread_lock()
    678  * need to be held in that order to protect target thread's pset, process,
    679  * process contents, and thread contents.  thread_lock() does splhigh(),
    680  * so it ends up having similiar effect as kpreempt_disable(), so it will
    681  * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
    682  */
    683 int
    684 lgrp_affinity_set(lgrp_affinity_args_t *ap)
    685 {
    686 	lgrp_affinity_t		aff;
    687 	lgrp_affinity_t		*aff_buf;
    688 	lgrp_affinity_args_t	args;
    689 	id_t			id;
    690 	idtype_t		idtype;
    691 	lgrp_id_t		lgrp;
    692 	int			nthreads;
    693 	proc_t			*p;
    694 	int			retval;
    695 
    696 	/*
    697 	 * Copyin arguments
    698 	 */
    699 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
    700 		return (set_errno(EFAULT));
    701 
    702 	idtype = args.idtype;
    703 	id = args.id;
    704 	lgrp = args.lgrp;
    705 	aff = args.aff;
    706 
    707 	/*
    708 	 * Check for invalid lgroup
    709 	 */
    710 	if (lgrp < 0 || lgrp == LGRP_NONE)
    711 		return (set_errno(EINVAL));
    712 
    713 	/*
    714 	 * Check for existing lgroup
    715 	 */
    716 	if (lgrp > lgrp_alloc_max)
    717 		return (set_errno(ESRCH));
    718 
    719 	/*
    720 	 * Check for legal affinity
    721 	 */
    722 	if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
    723 	    aff != LGRP_AFF_STRONG)
    724 		return (set_errno(EINVAL));
    725 
    726 	/*
    727 	 * Must be process or LWP ID
    728 	 */
    729 	if (idtype != P_LWPID && idtype != P_PID)
    730 		return (set_errno(EINVAL));
    731 
    732 	/*
    733 	 * Set given LWP's or process' affinity for specified lgroup
    734 	 */
    735 	switch (idtype) {
    736 
    737 	case P_LWPID:
    738 		/*
    739 		 * Allocate memory for thread's lgroup affinities
    740 		 * ahead of time w/o holding locks
    741 		 */
    742 		aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
    743 		    KM_SLEEP);
    744 
    745 		p = curproc;
    746 
    747 		/*
    748 		 * Set affinity for thread
    749 		 */
    750 		mutex_enter(&p->p_lock);
    751 		if (id == P_MYID) {		/* current thread */
    752 			retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
    753 			    &aff_buf);
    754 		} else if (p->p_tlist == NULL) {
    755 			retval = set_errno(ESRCH);
    756 		} else {			/* other thread */
    757 			int		found = 0;
    758 			kthread_t	*t;
    759 
    760 			t = p->p_tlist;
    761 			do {
    762 				if (t->t_tid == id) {
    763 					retval = lgrp_affinity_set_thread(t,
    764 					    lgrp, aff, &aff_buf);
    765 					found = 1;
    766 					break;
    767 				}
    768 			} while ((t = t->t_forw) != p->p_tlist);
    769 			if (!found)
    770 				retval = set_errno(ESRCH);
    771 		}
    772 		mutex_exit(&p->p_lock);
    773 
    774 		/*
    775 		 * Free memory for lgroup affinities,
    776 		 * since thread didn't need it
    777 		 */
    778 		if (aff_buf)
    779 			kmem_free(aff_buf,
    780 			    nlgrpsmax * sizeof (lgrp_affinity_t));
    781 
    782 		break;
    783 
    784 	case P_PID:
    785 
    786 		do {
    787 			lgrp_affinity_t	**aff_buf_array;
    788 			int		i;
    789 			size_t		size;
    790 
    791 			/*
    792 			 * Get process
    793 			 */
    794 			mutex_enter(&pidlock);
    795 
    796 			if (id == P_MYID)
    797 				p = curproc;
    798 			else
    799 				p = prfind(id);
    800 
    801 			if (p == NULL) {
    802 				mutex_exit(&pidlock);
    803 				return (set_errno(ESRCH));
    804 			}
    805 
    806 			/*
    807 			 * Get number of threads in process
    808 			 *
    809 			 * NOTE: Only care about user processes,
    810 			 *	 so p_lwpcnt should be number of threads.
    811 			 */
    812 			mutex_enter(&p->p_lock);
    813 			nthreads = p->p_lwpcnt;
    814 			mutex_exit(&p->p_lock);
    815 
    816 			mutex_exit(&pidlock);
    817 
    818 			if (nthreads < 1)
    819 				return (set_errno(ESRCH));
    820 
    821 			/*
    822 			 * Preallocate memory for lgroup affinities for
    823 			 * each thread in process now to avoid holding
    824 			 * any locks.  Allocate an array to hold a buffer
    825 			 * for each thread.
    826 			 */
    827 			aff_buf_array = kmem_zalloc(nthreads *
    828 			    sizeof (lgrp_affinity_t *), KM_SLEEP);
    829 
    830 			size = nlgrpsmax * sizeof (lgrp_affinity_t);
    831 			for (i = 0; i < nthreads; i++)
    832 				aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
    833 
    834 			mutex_enter(&pidlock);
    835 
    836 			/*
    837 			 * Get process again since dropped locks to allocate
    838 			 * memory (except current process)
    839 			 */
    840 			if (id != P_MYID)
    841 				p = prfind(id);
    842 
    843 			/*
    844 			 * Process went away after we dropped locks and before
    845 			 * reacquiring them, so drop locks, free memory, and
    846 			 * return.
    847 			 */
    848 			if (p == NULL) {
    849 				mutex_exit(&pidlock);
    850 				for (i = 0; i < nthreads; i++)
    851 					kmem_free(aff_buf_array[i], size);
    852 				kmem_free(aff_buf_array,
    853 				    nthreads * sizeof (lgrp_affinity_t *));
    854 				return (set_errno(ESRCH));
    855 			}
    856 
    857 			mutex_enter(&p->p_lock);
    858 
    859 			/*
    860 			 * See whether number of threads is same
    861 			 * If not, drop locks, free memory, and try again
    862 			 */
    863 			if (nthreads != p->p_lwpcnt) {
    864 				mutex_exit(&p->p_lock);
    865 				mutex_exit(&pidlock);
    866 				for (i = 0; i < nthreads; i++)
    867 					kmem_free(aff_buf_array[i], size);
    868 				kmem_free(aff_buf_array,
    869 				    nthreads * sizeof (lgrp_affinity_t *));
    870 				continue;
    871 			}
    872 
    873 			/*
    874 			 * Set lgroup affinity for threads in process
    875 			 */
    876 			retval = lgrp_affinity_set_proc(p, lgrp, aff,
    877 			    aff_buf_array);
    878 
    879 			mutex_exit(&p->p_lock);
    880 			mutex_exit(&pidlock);
    881 
    882 			/*
    883 			 * Free any leftover memory, since some threads may
    884 			 * have already allocated memory and set lgroup
    885 			 * affinities before
    886 			 */
    887 			for (i = 0; i < nthreads; i++)
    888 				if (aff_buf_array[i] != NULL)
    889 					kmem_free(aff_buf_array[i], size);
    890 			kmem_free(aff_buf_array,
    891 			    nthreads * sizeof (lgrp_affinity_t *));
    892 
    893 			break;
    894 
    895 		} while (nthreads != p->p_lwpcnt);
    896 
    897 		break;
    898 
    899 	default:
    900 		retval = set_errno(EINVAL);
    901 		break;
    902 	}
    903 
    904 	return (retval);
    905 }
    906 
    907 
    908 /*
    909  * Return the latest generation number for the lgroup hierarchy
    910  * with the given view
    911  */
    912 lgrp_gen_t
    913 lgrp_generation(lgrp_view_t view)
    914 {
    915 	cpupart_t	*cpupart;
    916 	uint_t		gen;
    917 
    918 	kpreempt_disable();
    919 
    920 	/*
    921 	 * Determine generation number for given view
    922 	 */
    923 	if (view == LGRP_VIEW_OS)
    924 		/*
    925 		 * Return generation number of lgroup hierarchy for OS view
    926 		 */
    927 		gen = lgrp_gen;
    928 	else {
    929 		/*
    930 		 * For caller's view, use generation numbers for lgroup
    931 		 * hierarchy and caller's pset
    932 		 * NOTE: Caller needs to check for change in pset ID
    933 		 */
    934 		cpupart = curthread->t_cpupart;
    935 		ASSERT(cpupart);
    936 		gen = lgrp_gen + cpupart->cp_gen;
    937 	}
    938 
    939 	kpreempt_enable();
    940 
    941 	return (gen);
    942 }
    943 
    944 
    945 lgrp_id_t
    946 lgrp_home_thread(kthread_t *t)
    947 {
    948 	lgrp_id_t	home;
    949 
    950 	ASSERT(t != NULL);
    951 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
    952 
    953 	thread_lock(t);
    954 
    955 	/*
    956 	 * Check to see whether caller has permission to set affinity for
    957 	 * thread
    958 	 */
    959 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
    960 		thread_unlock(t);
    961 		return (set_errno(EPERM));
    962 	}
    963 
    964 	home = lgrp_home_id(t);
    965 
    966 	thread_unlock(t);
    967 	return (home);
    968 }
    969 
    970 
    971 /*
    972  * Get home lgroup of given process or thread
    973  */
    974 lgrp_id_t
    975 lgrp_home_get(idtype_t idtype, id_t id)
    976 {
    977 	proc_t		*p;
    978 	lgrp_id_t	retval;
    979 	kthread_t	*t;
    980 
    981 	/*
    982 	 * Get home lgroup of given LWP or process
    983 	 */
    984 	switch (idtype) {
    985 
    986 	case P_LWPID:
    987 		p = curproc;
    988 
    989 		/*
    990 		 * Set affinity for thread
    991 		 */
    992 		mutex_enter(&p->p_lock);
    993 		if (id == P_MYID) {		/* current thread */
    994 			retval = lgrp_home_thread(curthread);
    995 		} else if (p->p_tlist == NULL) {
    996 			retval = set_errno(ESRCH);
    997 		} else {			/* other thread */
    998 			int	found = 0;
    999 
   1000 			t = p->p_tlist;
   1001 			do {
   1002 				if (t->t_tid == id) {
   1003 					retval = lgrp_home_thread(t);
   1004 					found = 1;
   1005 					break;
   1006 				}
   1007 			} while ((t = t->t_forw) != p->p_tlist);
   1008 			if (!found)
   1009 				retval = set_errno(ESRCH);
   1010 		}
   1011 		mutex_exit(&p->p_lock);
   1012 		break;
   1013 
   1014 	case P_PID:
   1015 		/*
   1016 		 * Get process
   1017 		 */
   1018 		mutex_enter(&pidlock);
   1019 
   1020 		if (id == P_MYID)
   1021 			p = curproc;
   1022 		else
   1023 			p = prfind(id);
   1024 
   1025 		if (p == NULL) {
   1026 			mutex_exit(&pidlock);
   1027 			return (set_errno(ESRCH));
   1028 		}
   1029 
   1030 		mutex_enter(&p->p_lock);
   1031 		t = p->p_tlist;
   1032 		if (t == NULL)
   1033 			retval = set_errno(ESRCH);
   1034 		else
   1035 			retval = lgrp_home_thread(t);
   1036 		mutex_exit(&p->p_lock);
   1037 
   1038 		mutex_exit(&pidlock);
   1039 
   1040 		break;
   1041 
   1042 	default:
   1043 		retval = set_errno(EINVAL);
   1044 		break;
   1045 	}
   1046 
   1047 	return (retval);
   1048 }
   1049 
   1050 
   1051 /*
   1052  * Return latency between "from" and "to" lgroups
   1053  *
   1054  * This latency number can only be used for relative comparison
   1055  * between lgroups on the running system, cannot be used across platforms,
   1056  * and may not reflect the actual latency.  It is platform and implementation
   1057  * specific, so platform gets to decide its value.  It would be nice if the
   1058  * number was at least proportional to make comparisons more meaningful though.
   1059  */
   1060 int
   1061 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
   1062 {
   1063 	lgrp_t		*from_lgrp;
   1064 	int		i;
   1065 	int		latency;
   1066 	int		latency_max;
   1067 	lgrp_t		*to_lgrp;
   1068 
   1069 	ASSERT(MUTEX_HELD(&cpu_lock));
   1070 
   1071 	if (from < 0 || to < 0)
   1072 		return (set_errno(EINVAL));
   1073 
   1074 	if (from > lgrp_alloc_max || to > lgrp_alloc_max)
   1075 		return (set_errno(ESRCH));
   1076 
   1077 	from_lgrp = lgrp_table[from];
   1078 	to_lgrp = lgrp_table[to];
   1079 
   1080 	if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
   1081 		return (set_errno(ESRCH));
   1082 	}
   1083 
   1084 	/*
   1085 	 * Get latency for same lgroup
   1086 	 */
   1087 	if (from == to) {
   1088 		latency = from_lgrp->lgrp_latency;
   1089 		return (latency);
   1090 	}
   1091 
   1092 	/*
   1093 	 * Get latency between leaf lgroups
   1094 	 */
   1095 	if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
   1096 		return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
   1097 		    to_lgrp->lgrp_plathand));
   1098 
   1099 	/*
   1100 	 * Determine max latency between resources in two lgroups
   1101 	 */
   1102 	latency_max = 0;
   1103 	for (i = 0; i <= lgrp_alloc_max; i++) {
   1104 		lgrp_t	*from_rsrc;
   1105 		int	j;
   1106 		lgrp_t	*to_rsrc;
   1107 
   1108 		from_rsrc = lgrp_table[i];
   1109 		if (!LGRP_EXISTS(from_rsrc) ||
   1110 		    !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
   1111 			continue;
   1112 
   1113 		for (j = 0; j <= lgrp_alloc_max; j++) {
   1114 			to_rsrc = lgrp_table[j];
   1115 			if (!LGRP_EXISTS(to_rsrc) ||
   1116 			    klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
   1117 			    j) == 0)
   1118 				continue;
   1119 			latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
   1120 			    to_rsrc->lgrp_plathand);
   1121 			if (latency > latency_max)
   1122 				latency_max = latency;
   1123 		}
   1124 	}
   1125 	return (latency_max);
   1126 }
   1127 
   1128 
   1129 /*
   1130  * Return lgroup interface version number
   1131  * 0 - none
   1132  * 1 - original
   1133  * 2 - lgrp_latency_cookie() and lgrp_resources() added
   1134  */
   1135 int
   1136 lgrp_version(int version)
   1137 {
   1138 	/*
   1139 	 * Return LGRP_VER_NONE when requested version isn't supported
   1140 	 */
   1141 	if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
   1142 		return (LGRP_VER_NONE);
   1143 
   1144 	/*
   1145 	 * Return current version when LGRP_VER_NONE passed in
   1146 	 */
   1147 	if (version == LGRP_VER_NONE)
   1148 		return (LGRP_VER_CURRENT);
   1149 
   1150 	/*
   1151 	 * Otherwise, return supported version.
   1152 	 */
   1153 	return (version);
   1154 }
   1155 
   1156 
   1157 /*
   1158  * Snapshot of lgroup hieararchy
   1159  *
   1160  * One snapshot is kept and is based on the kernel's native data model, so
   1161  * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
   1162  * 64-bit kernel.  If a 32-bit user wants a snapshot from the 64-bit kernel,
   1163  * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
   1164  *
   1165  * The format is defined by lgroup snapshot header and the layout of
   1166  * the snapshot in memory is as follows:
   1167  * 1) lgroup snapshot header
   1168  *    - specifies format of snapshot
   1169  *    - defined by lgrp_snapshot_header_t
   1170  * 2) lgroup info array
   1171  *    - contains information about each lgroup
   1172  *    - one element for each lgroup
   1173  *    - each element is defined by lgrp_info_t
   1174  * 3) lgroup CPU ID array
   1175  *    - contains list (array) of CPU IDs for each lgroup
   1176  *    - lgrp_info_t points into array and specifies how many CPUs belong to
   1177  *      given lgroup
   1178  * 4) lgroup parents array
   1179  *    - contains lgroup bitmask of parents for each lgroup
   1180  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
   1181  * 5) lgroup children array
   1182  *    - contains lgroup bitmask of children for each lgroup
   1183  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
   1184  * 6) lgroup resources array
   1185  *    - contains lgroup bitmask of resources for each lgroup
   1186  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
   1187  * 7) lgroup latency table
   1188  *    - contains latency from each lgroup to each of other lgroups
   1189  *
   1190  * NOTE:  Must use nlgrpsmax for per lgroup data structures because lgroups
   1191  *	  may be sparsely allocated.
   1192  */
   1193 lgrp_snapshot_header_t	*lgrp_snap = NULL;	/* lgroup snapshot */
   1194 static kmutex_t		lgrp_snap_lock;		/* snapshot lock */
   1195 
   1196 
   1197 /*
   1198  * Take a snapshot of lgroup hierarchy and return size of buffer
   1199  * needed to hold snapshot
   1200  */
   1201 static int
   1202 lgrp_snapshot(void)
   1203 {
   1204 	size_t		bitmask_size;
   1205 	size_t		bitmasks_size;
   1206 	size_t		bufsize;
   1207 	int		cpu_index;
   1208 	size_t		cpuids_size;
   1209 	int		i;
   1210 	int		j;
   1211 	size_t		info_size;
   1212 	size_t		lats_size;
   1213 	ulong_t		*lgrp_children;
   1214 	processorid_t	*lgrp_cpuids;
   1215 	lgrp_info_t	*lgrp_info;
   1216 	int		**lgrp_lats;
   1217 	ulong_t		*lgrp_parents;
   1218 	ulong_t		*lgrp_rsets;
   1219 	ulong_t		*lgrpset;
   1220 	int		snap_ncpus;
   1221 	int		snap_nlgrps;
   1222 	int		snap_nlgrpsmax;
   1223 	size_t		snap_hdr_size;
   1224 #ifdef	_SYSCALL32_IMPL
   1225 	model_t		model = DATAMODEL_NATIVE;
   1226 
   1227 	/*
   1228 	 * Have up-to-date snapshot, so check to see whether caller is 32-bit
   1229 	 * program and need to return size of 32-bit snapshot now.
   1230 	 */
   1231 	model = get_udatamodel();
   1232 	if (model == DATAMODEL_ILP32 && lgrp_snap &&
   1233 	    lgrp_snap->ss_gen == lgrp_gen) {
   1234 
   1235 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
   1236 
   1237 		/*
   1238 		 * Calculate size of buffer needed for 32-bit snapshot,
   1239 		 * rounding up size of each object to allow for alignment
   1240 		 * of next object in buffer.
   1241 		 */
   1242 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
   1243 		    sizeof (caddr32_t));
   1244 		info_size =
   1245 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
   1246 		    sizeof (processorid_t));
   1247 		cpuids_size =
   1248 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
   1249 		    sizeof (ulong_t));
   1250 
   1251 		/*
   1252 		 * lgroup bitmasks needed for parents, children, and resources
   1253 		 * for each lgroup and pset lgroup set
   1254 		 */
   1255 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
   1256 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
   1257 		    snap_nlgrpsmax) + 1) * bitmask_size;
   1258 
   1259 		/*
   1260 		 * Size of latency table and buffer
   1261 		 */
   1262 		lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
   1263 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
   1264 
   1265 		bufsize = snap_hdr_size + info_size + cpuids_size +
   1266 		    bitmasks_size + lats_size;
   1267 		return (bufsize);
   1268 	}
   1269 #endif	/* _SYSCALL32_IMPL */
   1270 
   1271 	/*
   1272 	 * Check whether snapshot is up-to-date
   1273 	 * Free it and take another one if not
   1274 	 */
   1275 	if (lgrp_snap) {
   1276 		if (lgrp_snap->ss_gen == lgrp_gen)
   1277 			return (lgrp_snap->ss_size);
   1278 
   1279 		kmem_free(lgrp_snap, lgrp_snap->ss_size);
   1280 		lgrp_snap = NULL;
   1281 	}
   1282 
   1283 	/*
   1284 	 * Allocate memory for snapshot
   1285 	 * w/o holding cpu_lock while waiting for memory
   1286 	 */
   1287 	while (lgrp_snap == NULL) {
   1288 		int	old_generation;
   1289 
   1290 		/*
   1291 		 * Take snapshot of lgroup generation number
   1292 		 * and configuration size dependent information
   1293 		 * NOTE: Only count number of online CPUs,
   1294 		 * since only online CPUs appear in lgroups.
   1295 		 */
   1296 		mutex_enter(&cpu_lock);
   1297 		old_generation = lgrp_gen;
   1298 		snap_ncpus = ncpus_online;
   1299 		snap_nlgrps = nlgrps;
   1300 		snap_nlgrpsmax = nlgrpsmax;
   1301 		mutex_exit(&cpu_lock);
   1302 
   1303 		/*
   1304 		 * Calculate size of buffer needed for snapshot,
   1305 		 * rounding up size of each object to allow for alignment
   1306 		 * of next object in buffer.
   1307 		 */
   1308 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
   1309 		    sizeof (void *));
   1310 		info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
   1311 		    sizeof (processorid_t));
   1312 		cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
   1313 		    sizeof (ulong_t));
   1314 		/*
   1315 		 * lgroup bitmasks needed for pset lgroup set and  parents,
   1316 		 * children, and resource sets for each lgroup
   1317 		 */
   1318 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
   1319 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
   1320 		    snap_nlgrpsmax) + 1) * bitmask_size;
   1321 
   1322 		/*
   1323 		 * Size of latency table and buffer
   1324 		 */
   1325 		lats_size = snap_nlgrpsmax * sizeof (int *) +
   1326 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
   1327 
   1328 		bufsize = snap_hdr_size + info_size + cpuids_size +
   1329 		    bitmasks_size + lats_size;
   1330 
   1331 		/*
   1332 		 * Allocate memory for buffer
   1333 		 */
   1334 		lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
   1335 		if (lgrp_snap == NULL)
   1336 			return (set_errno(ENOMEM));
   1337 
   1338 		/*
   1339 		 * Check whether generation number has changed
   1340 		 */
   1341 		mutex_enter(&cpu_lock);
   1342 		if (lgrp_gen == old_generation)
   1343 			break;		/* hasn't change, so done. */
   1344 
   1345 		/*
   1346 		 * Generation number changed, so free memory and try again.
   1347 		 */
   1348 		mutex_exit(&cpu_lock);
   1349 		kmem_free(lgrp_snap, bufsize);
   1350 		lgrp_snap = NULL;
   1351 	}
   1352 
   1353 	/*
   1354 	 * Fill in lgroup snapshot header
   1355 	 * (including pointers to tables of lgroup info, CPU IDs, and parents
   1356 	 * and children)
   1357 	 */
   1358 	lgrp_snap->ss_version = LGRP_VER_CURRENT;
   1359 
   1360 	/*
   1361 	 * XXX For now, liblgrp only needs to know whether the hierarchy
   1362 	 * XXX only has one level or not
   1363 	 */
   1364 	if (snap_nlgrps == 1)
   1365 		lgrp_snap->ss_levels = 1;
   1366 	else
   1367 		lgrp_snap->ss_levels = 2;
   1368 
   1369 	lgrp_snap->ss_root = LGRP_ROOTID;
   1370 
   1371 	lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
   1372 	lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
   1373 	lgrp_snap->ss_ncpus = snap_ncpus;
   1374 	lgrp_snap->ss_gen = lgrp_gen;
   1375 	lgrp_snap->ss_view = LGRP_VIEW_OS;
   1376 	lgrp_snap->ss_pset = 0;		/* NOTE: caller should set if needed */
   1377 	lgrp_snap->ss_size = bufsize;
   1378 	lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
   1379 
   1380 	lgrp_snap->ss_info = lgrp_info =
   1381 	    (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
   1382 
   1383 	lgrp_snap->ss_cpuids = lgrp_cpuids =
   1384 	    (processorid_t *)((uintptr_t)lgrp_info + info_size);
   1385 
   1386 	lgrp_snap->ss_lgrpset = lgrpset =
   1387 	    (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
   1388 
   1389 	lgrp_snap->ss_parents = lgrp_parents =
   1390 	    (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
   1391 
   1392 	lgrp_snap->ss_children = lgrp_children =
   1393 	    (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
   1394 	    bitmask_size));
   1395 
   1396 	lgrp_snap->ss_rsets = lgrp_rsets =
   1397 	    (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
   1398 	    bitmask_size));
   1399 
   1400 	lgrp_snap->ss_latencies = lgrp_lats =
   1401 	    (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
   1402 		snap_nlgrpsmax * bitmask_size));
   1403 
   1404 	/*
   1405 	 * Fill in lgroup information
   1406 	 */
   1407 	cpu_index = 0;
   1408 	for (i = 0; i < snap_nlgrpsmax; i++) {
   1409 		struct cpu	*cp;
   1410 		int		cpu_count;
   1411 		struct cpu	*head;
   1412 		int		k;
   1413 		lgrp_t		*lgrp;
   1414 
   1415 		lgrp = lgrp_table[i];
   1416 		if (!LGRP_EXISTS(lgrp)) {
   1417 			bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
   1418 			lgrp_info[i].info_lgrpid = LGRP_NONE;
   1419 			continue;
   1420 		}
   1421 
   1422 		lgrp_info[i].info_lgrpid = i;
   1423 		lgrp_info[i].info_latency = lgrp->lgrp_latency;
   1424 
   1425 		/*
   1426 		 * Fill in parents, children, and lgroup resources
   1427 		 */
   1428 		lgrp_info[i].info_parents =
   1429 		    (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
   1430 
   1431 		if (lgrp->lgrp_parent)
   1432 			BT_SET(lgrp_info[i].info_parents,
   1433 			    lgrp->lgrp_parent->lgrp_id);
   1434 
   1435 		lgrp_info[i].info_children =
   1436 		    (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
   1437 
   1438 		for (j = 0; j < snap_nlgrpsmax; j++)
   1439 			if (klgrpset_ismember(lgrp->lgrp_children, j))
   1440 				BT_SET(lgrp_info[i].info_children, j);
   1441 
   1442 		lgrp_info[i].info_rset =
   1443 		    (ulong_t *)((uintptr_t)lgrp_rsets +
   1444 		    (i * LGRP_RSRC_COUNT * bitmask_size));
   1445 
   1446 		for (j = 0; j < LGRP_RSRC_COUNT; j++) {
   1447 			ulong_t	*rset;
   1448 
   1449 			rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
   1450 			    (j * bitmask_size));
   1451 			for (k = 0; k < snap_nlgrpsmax; k++)
   1452 				if (klgrpset_ismember(lgrp->lgrp_set[j], k))
   1453 					BT_SET(rset, k);
   1454 		}
   1455 
   1456 		/*
   1457 		 * Fill in CPU IDs
   1458 		 */
   1459 		cpu_count = 0;
   1460 		lgrp_info[i].info_cpuids = NULL;
   1461 		cp = head = lgrp->lgrp_cpu;
   1462 		if (head != NULL) {
   1463 			lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
   1464 			do {
   1465 				lgrp_cpuids[cpu_index] = cp->cpu_id;
   1466 				cpu_index++;
   1467 				cpu_count++;
   1468 				cp = cp->cpu_next_lgrp;
   1469 			} while (cp != head);
   1470 		}
   1471 		ASSERT(cpu_count == lgrp->lgrp_cpucnt);
   1472 		lgrp_info[i].info_ncpus = cpu_count;
   1473 
   1474 		/*
   1475 		 * Fill in memory sizes for lgroups that directly contain
   1476 		 * memory
   1477 		 */
   1478 		if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
   1479 			lgrp_info[i].info_mem_free =
   1480 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
   1481 			lgrp_info[i].info_mem_install =
   1482 			    lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
   1483 		}
   1484 
   1485 		/*
   1486 		 * Fill in latency table and buffer
   1487 		 */
   1488 		lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
   1489 		    sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
   1490 		for (j = 0; j < snap_nlgrpsmax; j++) {
   1491 			lgrp_t	*to;
   1492 
   1493 			to = lgrp_table[j];
   1494 			if (!LGRP_EXISTS(to))
   1495 				continue;
   1496 			lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
   1497 			    to->lgrp_id);
   1498 		}
   1499 	}
   1500 	ASSERT(cpu_index == snap_ncpus);
   1501 
   1502 
   1503 	mutex_exit(&cpu_lock);
   1504 
   1505 #ifdef	_SYSCALL32_IMPL
   1506 	/*
   1507 	 * Check to see whether caller is 32-bit program and need to return
   1508 	 * size of 32-bit snapshot now that snapshot has been taken/updated.
   1509 	 * May not have been able to do this earlier if snapshot was out of
   1510 	 * date or didn't exist yet.
   1511 	 */
   1512 	if (model == DATAMODEL_ILP32) {
   1513 
   1514 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
   1515 
   1516 		/*
   1517 		 * Calculate size of buffer needed for 32-bit snapshot,
   1518 		 * rounding up size of each object to allow for alignment
   1519 		 * of next object in buffer.
   1520 		 */
   1521 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
   1522 		    sizeof (caddr32_t));
   1523 		info_size =
   1524 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
   1525 		    sizeof (processorid_t));
   1526 		cpuids_size =
   1527 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
   1528 		    sizeof (ulong_t));
   1529 
   1530 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
   1531 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
   1532 		    1) * bitmask_size;
   1533 
   1534 
   1535 		/*
   1536 		 * Size of latency table and buffer
   1537 		 */
   1538 		lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
   1539 		    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
   1540 
   1541 		bufsize = snap_hdr_size + info_size + cpuids_size +
   1542 		    bitmasks_size + lats_size;
   1543 		return (bufsize);
   1544 	}
   1545 #endif	/* _SYSCALL32_IMPL */
   1546 
   1547 	return (lgrp_snap->ss_size);
   1548 }
   1549 
   1550 
   1551 /*
   1552  * Copy snapshot into given user buffer, fix up any pointers in buffer to point
   1553  * into user instead of kernel address space, and return size of buffer
   1554  * needed to hold snapshot
   1555  */
   1556 static int
   1557 lgrp_snapshot_copy(char *buf, size_t bufsize)
   1558 {
   1559 	size_t			bitmask_size;
   1560 	int			cpu_index;
   1561 	size_t			cpuids_size;
   1562 	int			i;
   1563 	size_t			info_size;
   1564 	lgrp_info_t		*lgrp_info;
   1565 	int			retval;
   1566 	size_t			snap_hdr_size;
   1567 	int			snap_ncpus;
   1568 	int			snap_nlgrpsmax;
   1569 	lgrp_snapshot_header_t	*user_snap;
   1570 	lgrp_info_t		*user_info;
   1571 	lgrp_info_t		*user_info_buffer;
   1572 	processorid_t		*user_cpuids;
   1573 	ulong_t			*user_lgrpset;
   1574 	ulong_t			*user_parents;
   1575 	ulong_t			*user_children;
   1576 	int			**user_lats;
   1577 	int			**user_lats_buffer;
   1578 	ulong_t			*user_rsets;
   1579 
   1580 	if (lgrp_snap == NULL)
   1581 		return (0);
   1582 
   1583 	if (buf == NULL || bufsize <= 0)
   1584 		return (lgrp_snap->ss_size);
   1585 
   1586 	/*
   1587 	 * User needs to try getting size of buffer again
   1588 	 * because given buffer size is too small.
   1589 	 * The lgroup hierarchy may have changed after they asked for the size
   1590 	 * but before the snapshot was taken.
   1591 	 */
   1592 	if (bufsize < lgrp_snap->ss_size)
   1593 		return (set_errno(EAGAIN));
   1594 
   1595 	snap_ncpus = lgrp_snap->ss_ncpus;
   1596 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
   1597 
   1598 	/*
   1599 	 * Fill in lgrpset now because caller may have change psets
   1600 	 */
   1601 	kpreempt_disable();
   1602 	for (i = 0; i < snap_nlgrpsmax; i++) {
   1603 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
   1604 		    i)) {
   1605 			BT_SET(lgrp_snap->ss_lgrpset, i);
   1606 		}
   1607 	}
   1608 	kpreempt_enable();
   1609 
   1610 	/*
   1611 	 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
   1612 	 * into user buffer all at once
   1613 	 */
   1614 	if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
   1615 		return (set_errno(EFAULT));
   1616 
   1617 	/*
   1618 	 * Round up sizes of lgroup snapshot header and info for alignment
   1619 	 */
   1620 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
   1621 	    sizeof (void *));
   1622 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
   1623 	    sizeof (processorid_t));
   1624 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
   1625 	    sizeof (ulong_t));
   1626 
   1627 	bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
   1628 
   1629 	/*
   1630 	 * Calculate pointers into user buffer for lgroup snapshot header,
   1631 	 * info, and CPU IDs
   1632 	 */
   1633 	user_snap = (lgrp_snapshot_header_t *)buf;
   1634 	user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
   1635 	user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
   1636 	user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
   1637 	user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
   1638 	user_children = (ulong_t *)((uintptr_t)user_parents +
   1639 	    (snap_nlgrpsmax * bitmask_size));
   1640 	user_rsets = (ulong_t *)((uintptr_t)user_children +
   1641 	    (snap_nlgrpsmax * bitmask_size));
   1642 	user_lats = (int **)((uintptr_t)user_rsets +
   1643 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
   1644 
   1645 	/*
   1646 	 * Copyout magic number (ie. pointer to beginning of buffer)
   1647 	 */
   1648 	if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
   1649 		return (set_errno(EFAULT));
   1650 
   1651 	/*
   1652 	 * Fix up pointers in user buffer to point into user buffer
   1653 	 * not kernel snapshot
   1654 	 */
   1655 	if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
   1656 		return (set_errno(EFAULT));
   1657 
   1658 	if (copyout(&user_cpuids, &user_snap->ss_cpuids,
   1659 	    sizeof (user_cpuids)) != 0)
   1660 		return (set_errno(EFAULT));
   1661 
   1662 	if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
   1663 	    sizeof (user_lgrpset)) != 0)
   1664 		return (set_errno(EFAULT));
   1665 
   1666 	if (copyout(&user_parents, &user_snap->ss_parents,
   1667 	    sizeof (user_parents)) != 0)
   1668 		return (set_errno(EFAULT));
   1669 
   1670 	if (copyout(&user_children, &user_snap->ss_children,
   1671 	    sizeof (user_children)) != 0)
   1672 		return (set_errno(EFAULT));
   1673 
   1674 	if (copyout(&user_rsets, &user_snap->ss_rsets,
   1675 	    sizeof (user_rsets)) != 0)
   1676 		return (set_errno(EFAULT));
   1677 
   1678 	if (copyout(&user_lats, &user_snap->ss_latencies,
   1679 	    sizeof (user_lats)) != 0)
   1680 		return (set_errno(EFAULT));
   1681 
   1682 	/*
   1683 	 * Make copies of lgroup info and latency table, fix up pointers,
   1684 	 * and then copy them into user buffer
   1685 	 */
   1686 	user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
   1687 	if (user_info_buffer == NULL)
   1688 		return (set_errno(ENOMEM));
   1689 
   1690 	user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
   1691 	    KM_NOSLEEP);
   1692 	if (user_lats_buffer == NULL) {
   1693 		kmem_free(user_info_buffer, info_size);
   1694 		return (set_errno(ENOMEM));
   1695 	}
   1696 
   1697 	lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
   1698 	bcopy(lgrp_info, user_info_buffer, info_size);
   1699 
   1700 	cpu_index = 0;
   1701 	for (i = 0; i < snap_nlgrpsmax; i++) {
   1702 		ulong_t	*snap_rset;
   1703 
   1704 		/*
   1705 		 * Skip non-existent lgroups
   1706 		 */
   1707 		if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
   1708 			continue;
   1709 
   1710 		/*
   1711 		 * Update free memory size since it changes frequently
   1712 		 * Only do so for lgroups directly containing memory
   1713 		 *
   1714 		 * NOTE: This must be done before changing the pointers to
   1715 		 *	 point into user space since we need to dereference
   1716 		 *	 lgroup resource set
   1717 		 */
   1718 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
   1719 		    BT_BITOUL(snap_nlgrpsmax)];
   1720 		if (BT_TEST(snap_rset, i))
   1721 			user_info_buffer[i].info_mem_free =
   1722 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
   1723 
   1724 		/*
   1725 		 * Fix up pointers to parents, children, resources, and
   1726 		 * latencies
   1727 		 */
   1728 		user_info_buffer[i].info_parents =
   1729 		    (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
   1730 		user_info_buffer[i].info_children =
   1731 		    (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
   1732 		user_info_buffer[i].info_rset =
   1733 		    (ulong_t *)((uintptr_t)user_rsets +
   1734 		    (i * LGRP_RSRC_COUNT * bitmask_size));
   1735 		user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
   1736 		    (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
   1737 		    sizeof (int)));
   1738 
   1739 		/*
   1740 		 * Fix up pointer to CPU IDs
   1741 		 */
   1742 		if (user_info_buffer[i].info_ncpus == 0) {
   1743 			user_info_buffer[i].info_cpuids = NULL;
   1744 			continue;
   1745 		}
   1746 		user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
   1747 		cpu_index += user_info_buffer[i].info_ncpus;
   1748 	}
   1749 	ASSERT(cpu_index == snap_ncpus);
   1750 
   1751 	/*
   1752 	 * Copy lgroup info and latency table with pointers fixed up to point
   1753 	 * into user buffer out to user buffer now
   1754 	 */
   1755 	retval = lgrp_snap->ss_size;
   1756 	if (copyout(user_info_buffer, user_info, info_size) != 0)
   1757 		retval = set_errno(EFAULT);
   1758 	kmem_free(user_info_buffer, info_size);
   1759 
   1760 	if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
   1761 	    sizeof (int *)) != 0)
   1762 		retval = set_errno(EFAULT);
   1763 	kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
   1764 
   1765 	return (retval);
   1766 }
   1767 
   1768 
   1769 #ifdef	_SYSCALL32_IMPL
   1770 /*
   1771  * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
   1772  * into user instead of kernel address space, copy 32-bit snapshot into
   1773  * given user buffer, and return size of buffer needed to hold snapshot
   1774  */
   1775 static int
   1776 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
   1777 {
   1778 	size32_t			bitmask_size;
   1779 	size32_t			bitmasks_size;
   1780 	size32_t			children_size;
   1781 	int				cpu_index;
   1782 	size32_t			cpuids_size;
   1783 	int				i;
   1784 	int				j;
   1785 	size32_t			info_size;
   1786 	size32_t			lats_size;
   1787 	lgrp_info_t			*lgrp_info;
   1788 	lgrp_snapshot_header32_t	*lgrp_snap32;
   1789 	lgrp_info32_t			*lgrp_info32;
   1790 	processorid_t			*lgrp_cpuids32;
   1791 	caddr32_t			*lgrp_lats32;
   1792 	int				**lgrp_lats32_kernel;
   1793 	uint_t				*lgrp_set32;
   1794 	uint_t				*lgrp_parents32;
   1795 	uint_t				*lgrp_children32;
   1796 	uint_t				*lgrp_rsets32;
   1797 	size32_t			parents_size;
   1798 	size32_t			rsets_size;
   1799 	size32_t			set_size;
   1800 	size32_t			snap_hdr_size;
   1801 	int				snap_ncpus;
   1802 	int				snap_nlgrpsmax;
   1803 	size32_t			snap_size;
   1804 
   1805 	if (lgrp_snap == NULL)
   1806 		return (0);
   1807 
   1808 	snap_ncpus = lgrp_snap->ss_ncpus;
   1809 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
   1810 
   1811 	/*
   1812 	 * Calculate size of buffer needed for 32-bit snapshot,
   1813 	 * rounding up size of each object to allow for alignment
   1814 	 * of next object in buffer.
   1815 	 */
   1816 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
   1817 	    sizeof (caddr32_t));
   1818 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
   1819 	    sizeof (processorid_t));
   1820 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
   1821 		    sizeof (ulong_t));
   1822 
   1823 	bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
   1824 
   1825 	set_size = bitmask_size;
   1826 	parents_size = snap_nlgrpsmax * bitmask_size;
   1827 	children_size = snap_nlgrpsmax * bitmask_size;
   1828 	rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
   1829 	    (int)bitmask_size, sizeof (caddr32_t));
   1830 
   1831 	bitmasks_size = set_size + parents_size + children_size + rsets_size;
   1832 
   1833 	/*
   1834 	 * Size of latency table and buffer
   1835 	 */
   1836 	lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
   1837 	    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
   1838 
   1839 	snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
   1840 		lats_size;
   1841 
   1842 	if (buf == NULL || bufsize <= 0) {
   1843 		return (snap_size);
   1844 	}
   1845 
   1846 	/*
   1847 	 * User needs to try getting size of buffer again
   1848 	 * because given buffer size is too small.
   1849 	 * The lgroup hierarchy may have changed after they asked for the size
   1850 	 * but before the snapshot was taken.
   1851 	 */
   1852 	if (bufsize < snap_size)
   1853 		return (set_errno(EAGAIN));
   1854 
   1855 	/*
   1856 	 * Make 32-bit copy of snapshot, fix up pointers to point into user
   1857 	 * buffer not kernel, and then copy whole thing into user buffer
   1858 	 */
   1859 	lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
   1860 	if (lgrp_snap32 == NULL)
   1861 		return (set_errno(ENOMEM));
   1862 
   1863 	/*
   1864 	 * Calculate pointers into 32-bit copy of snapshot
   1865 	 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
   1866 	 * resources, and latency table and buffer
   1867 	 */
   1868 	lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
   1869 	    snap_hdr_size);
   1870 	lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
   1871 	lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
   1872 	lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
   1873 	lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
   1874 	lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
   1875 	lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
   1876 
   1877 	/*
   1878 	 * Make temporary lgroup latency table of pointers for kernel to use
   1879 	 * to fill in rows of table with latencies from each lgroup
   1880 	 */
   1881 	lgrp_lats32_kernel =  kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
   1882 	    KM_NOSLEEP);
   1883 	if (lgrp_lats32_kernel == NULL) {
   1884 		kmem_free(lgrp_snap32, snap_size);
   1885 		return (set_errno(ENOMEM));
   1886 	}
   1887 
   1888 	/*
   1889 	 * Fill in 32-bit lgroup snapshot header
   1890 	 * (with pointers into user's buffer for lgroup info, CPU IDs,
   1891 	 * bit masks, and latencies)
   1892 	 */
   1893 	lgrp_snap32->ss_version = lgrp_snap->ss_version;
   1894 	lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
   1895 	lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
   1896 	    lgrp_snap->ss_nlgrps;
   1897 	lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
   1898 	lgrp_snap32->ss_root = lgrp_snap->ss_root;
   1899 	lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
   1900 	lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
   1901 	lgrp_snap32->ss_view = LGRP_VIEW_OS;
   1902 	lgrp_snap32->ss_size = snap_size;
   1903 	lgrp_snap32->ss_magic = buf;
   1904 	lgrp_snap32->ss_info = buf + snap_hdr_size;
   1905 	lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
   1906 	lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
   1907 	lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
   1908 	lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
   1909 	    (snap_nlgrpsmax * bitmask_size);
   1910 	lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
   1911 	    (snap_nlgrpsmax * bitmask_size);
   1912 	lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
   1913 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
   1914 
   1915 	/*
   1916 	 * Fill in lgrpset now because caller may have change psets
   1917 	 */
   1918 	kpreempt_disable();
   1919 	for (i = 0; i < snap_nlgrpsmax; i++) {
   1920 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
   1921 		    i)) {
   1922 			BT_SET32(lgrp_set32, i);
   1923 		}
   1924 	}
   1925 	kpreempt_enable();
   1926 
   1927 	/*
   1928 	 * Fill in 32-bit copy of lgroup info and fix up pointers
   1929 	 * to point into user's buffer instead of kernel's
   1930 	 */
   1931 	cpu_index = 0;
   1932 	lgrp_info = lgrp_snap->ss_info;
   1933 	for (i = 0; i < snap_nlgrpsmax; i++) {
   1934 		uint_t	*children;
   1935 		uint_t	*lgrp_rset;
   1936 		uint_t	*parents;
   1937 		ulong_t	*snap_rset;
   1938 
   1939 		/*
   1940 		 * Skip non-existent lgroups
   1941 		 */
   1942 		if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
   1943 			bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
   1944 			lgrp_info32[i].info_lgrpid = LGRP_NONE;
   1945 			continue;
   1946 		}
   1947 
   1948 		/*
   1949 		 * Fill in parents, children, lgroup resource set, and
   1950 		 * latencies from snapshot
   1951 		 */
   1952 		parents = (uint_t *)((uintptr_t)lgrp_parents32 +
   1953 		    i * bitmask_size);
   1954 		children = (uint_t *)((uintptr_t)lgrp_children32 +
   1955 		    i * bitmask_size);
   1956 		snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
   1957 		    (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
   1958 		lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
   1959 		    (i * LGRP_RSRC_COUNT * bitmask_size));
   1960 		lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
   1961 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
   1962 		    sizeof (int));
   1963 		for (j = 0; j < snap_nlgrpsmax; j++) {
   1964 			int	k;
   1965 			uint_t	*rset;
   1966 
   1967 			if (BT_TEST(&lgrp_snap->ss_parents[i], j))
   1968 				BT_SET32(parents, j);
   1969 
   1970 			if (BT_TEST(&lgrp_snap->ss_children[i], j))
   1971 				BT_SET32(children, j);
   1972 
   1973 			for (k = 0; k < LGRP_RSRC_COUNT; k++) {
   1974 				rset = (uint_t *)((uintptr_t)lgrp_rset +
   1975 				    k * bitmask_size);
   1976 				if (BT_TEST(&snap_rset[k], j))
   1977 					BT_SET32(rset, j);
   1978 			}
   1979 
   1980 			lgrp_lats32_kernel[i][j] =
   1981 			    lgrp_snap->ss_latencies[i][j];
   1982 		}
   1983 
   1984 		/*
   1985 		 * Fix up pointer to latency buffer
   1986 		 */
   1987 		lgrp_lats32[i] = lgrp_snap32->ss_latencies +
   1988 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
   1989 		    sizeof (int);
   1990 
   1991 		/*
   1992 		 * Fix up pointers for parents, children, and resources
   1993 		 */
   1994 		lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
   1995 		    (i * bitmask_size);
   1996 		lgrp_info32[i].info_children = lgrp_snap32->ss_children +
   1997 		    (i * bitmask_size);
   1998 		lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
   1999 		    (i * LGRP_RSRC_COUNT * bitmask_size);
   2000 
   2001 		/*
   2002 		 * Fill in memory and CPU info
   2003 		 * Only fill in memory for lgroups directly containing memory
   2004 		 */
   2005 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
   2006 		    BT_BITOUL(snap_nlgrpsmax)];
   2007 		if (BT_TEST(snap_rset, i)) {
   2008 			lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
   2009 			    LGRP_MEM_SIZE_FREE);
   2010 			lgrp_info32[i].info_mem_install =
   2011 			    lgrp_info[i].info_mem_install;
   2012 		}
   2013 
   2014 		lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
   2015 
   2016 		lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
   2017 		lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
   2018 
   2019 		if (lgrp_info32[i].info_ncpus == 0) {
   2020 			lgrp_info32[i].info_cpuids = 0;
   2021 			continue;
   2022 		}
   2023 
   2024 		/*
   2025 		 * Fix up pointer for CPU IDs
   2026 		 */
   2027 		lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
   2028 		    (cpu_index * sizeof (processorid_t));
   2029 		cpu_index += lgrp_info32[i].info_ncpus;
   2030 	}
   2031 	ASSERT(cpu_index == snap_ncpus);
   2032 
   2033 	/*
   2034 	 * Copy lgroup CPU IDs into 32-bit snapshot
   2035 	 * before copying it out into user's buffer
   2036 	 */
   2037 	bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
   2038 
   2039 	/*
   2040 	 * Copy 32-bit lgroup snapshot into user's buffer all at once
   2041 	 */
   2042 	if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
   2043 		kmem_free(lgrp_snap32, snap_size);
   2044 		kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
   2045 		return (set_errno(EFAULT));
   2046 	}
   2047 
   2048 	kmem_free(lgrp_snap32, snap_size);
   2049 	kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
   2050 
   2051 	return (snap_size);
   2052 }
   2053 #endif	/* _SYSCALL32_IMPL */
   2054 
   2055 
   2056 int
   2057 lgrpsys(int subcode, long ia, void *ap)
   2058 {
   2059 	size_t	bufsize;
   2060 	int	latency;
   2061 
   2062 	switch (subcode) {
   2063 
   2064 	case LGRP_SYS_AFFINITY_GET:
   2065 		return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
   2066 
   2067 	case LGRP_SYS_AFFINITY_SET:
   2068 		return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
   2069 
   2070 	case LGRP_SYS_GENERATION:
   2071 		return (lgrp_generation(ia));
   2072 
   2073 	case LGRP_SYS_HOME:
   2074 		return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
   2075 
   2076 	case LGRP_SYS_LATENCY:
   2077 		mutex_enter(&cpu_lock);
   2078 		latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
   2079 		mutex_exit(&cpu_lock);
   2080 		return (latency);
   2081 
   2082 	case LGRP_SYS_MEMINFO:
   2083 		return (meminfo(ia, (struct meminfo *)ap));
   2084 
   2085 	case LGRP_SYS_VERSION:
   2086 		return (lgrp_version(ia));
   2087 
   2088 	case LGRP_SYS_SNAPSHOT:
   2089 		mutex_enter(&lgrp_snap_lock);
   2090 		bufsize = lgrp_snapshot();
   2091 		if (ap && ia > 0) {
   2092 			if (get_udatamodel() == DATAMODEL_NATIVE)
   2093 				bufsize = lgrp_snapshot_copy(ap, ia);
   2094 #ifdef	_SYSCALL32_IMPL
   2095 			else
   2096 				bufsize = lgrp_snapshot_copy32(
   2097 				    (caddr32_t)(uintptr_t)ap, ia);
   2098 #endif	/* _SYSCALL32_IMPL */
   2099 		}
   2100 		mutex_exit(&lgrp_snap_lock);
   2101 		return (bufsize);
   2102 
   2103 	default:
   2104 		break;
   2105 
   2106 	}
   2107 
   2108 	return (set_errno(EINVAL));
   2109 }
   2110