Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Architecture-independent CPU control functions.
     28  */
     29 
     30 #include <sys/types.h>
     31 #include <sys/param.h>
     32 #include <sys/var.h>
     33 #include <sys/thread.h>
     34 #include <sys/cpuvar.h>
     35 #include <sys/cpu_event.h>
     36 #include <sys/kstat.h>
     37 #include <sys/uadmin.h>
     38 #include <sys/systm.h>
     39 #include <sys/errno.h>
     40 #include <sys/cmn_err.h>
     41 #include <sys/procset.h>
     42 #include <sys/processor.h>
     43 #include <sys/debug.h>
     44 #include <sys/cpupart.h>
     45 #include <sys/lgrp.h>
     46 #include <sys/pset.h>
     47 #include <sys/pghw.h>
     48 #include <sys/kmem.h>
     49 #include <sys/kmem_impl.h>	/* to set per-cpu kmem_cache offset */
     50 #include <sys/atomic.h>
     51 #include <sys/callb.h>
     52 #include <sys/vtrace.h>
     53 #include <sys/cyclic.h>
     54 #include <sys/bitmap.h>
     55 #include <sys/nvpair.h>
     56 #include <sys/pool_pset.h>
     57 #include <sys/msacct.h>
     58 #include <sys/time.h>
     59 #include <sys/archsystm.h>
     60 #include <sys/sdt.h>
     61 #if defined(__x86) || defined(__amd64)
     62 #include <sys/x86_archext.h>
     63 #endif
     64 #include <sys/callo.h>
     65 
     66 extern int	mp_cpu_start(cpu_t *);
     67 extern int	mp_cpu_stop(cpu_t *);
     68 extern int	mp_cpu_poweron(cpu_t *);
     69 extern int	mp_cpu_poweroff(cpu_t *);
     70 extern int	mp_cpu_configure(int);
     71 extern int	mp_cpu_unconfigure(int);
     72 extern void	mp_cpu_faulted_enter(cpu_t *);
     73 extern void	mp_cpu_faulted_exit(cpu_t *);
     74 
     75 extern int cmp_cpu_to_chip(processorid_t cpuid);
     76 #ifdef __sparcv9
     77 extern char *cpu_fru_fmri(cpu_t *cp);
     78 #endif
     79 
     80 static void cpu_add_active_internal(cpu_t *cp);
     81 static void cpu_remove_active(cpu_t *cp);
     82 static void cpu_info_kstat_create(cpu_t *cp);
     83 static void cpu_info_kstat_destroy(cpu_t *cp);
     84 static void cpu_stats_kstat_create(cpu_t *cp);
     85 static void cpu_stats_kstat_destroy(cpu_t *cp);
     86 
     87 static int cpu_sys_stats_ks_update(kstat_t *ksp, int rw);
     88 static int cpu_vm_stats_ks_update(kstat_t *ksp, int rw);
     89 static int cpu_stat_ks_update(kstat_t *ksp, int rw);
     90 static int cpu_state_change_hooks(int, cpu_setup_t, cpu_setup_t);
     91 
     92 /*
     93  * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active,
     94  * and dispatch queue reallocations.  The lock ordering with respect to
     95  * related locks is:
     96  *
     97  *	cpu_lock --> thread_free_lock  --->  p_lock  --->  thread_lock()
     98  *
     99  * Warning:  Certain sections of code do not use the cpu_lock when
    100  * traversing the cpu_list (e.g. mutex_vector_enter(), clock()).  Since
    101  * all cpus are paused during modifications to this list, a solution
    102  * to protect the list is too either disable kernel preemption while
    103  * walking the list, *or* recheck the cpu_next pointer at each
    104  * iteration in the loop.  Note that in no cases can any cached
    105  * copies of the cpu pointers be kept as they may become invalid.
    106  */
    107 kmutex_t	cpu_lock;
    108 cpu_t		*cpu_list;		/* list of all CPUs */
    109 cpu_t		*clock_cpu_list;	/* used by clock to walk CPUs */
    110 cpu_t		*cpu_active;		/* list of active CPUs */
    111 static cpuset_t	cpu_available;		/* set of available CPUs */
    112 cpuset_t	cpu_seqid_inuse;	/* which cpu_seqids are in use */
    113 
    114 cpu_t		**cpu_seq;		/* ptrs to CPUs, indexed by seq_id */
    115 
    116 /*
    117  * max_ncpus keeps the max cpus the system can have. Initially
    118  * it's NCPU, but since most archs scan the devtree for cpus
    119  * fairly early on during boot, the real max can be known before
    120  * ncpus is set (useful for early NCPU based allocations).
    121  */
    122 int max_ncpus = NCPU;
    123 /*
    124  * platforms that set max_ncpus to maxiumum number of cpus that can be
    125  * dynamically added will set boot_max_ncpus to the number of cpus found
    126  * at device tree scan time during boot.
    127  */
    128 int boot_max_ncpus = -1;
    129 int boot_ncpus = -1;
    130 /*
    131  * Maximum possible CPU id.  This can never be >= NCPU since NCPU is
    132  * used to size arrays that are indexed by CPU id.
    133  */
    134 processorid_t max_cpuid = NCPU - 1;
    135 
    136 int ncpus = 1;
    137 int ncpus_online = 1;
    138 
    139 /*
    140  * CPU that we're trying to offline.  Protected by cpu_lock.
    141  */
    142 cpu_t *cpu_inmotion;
    143 
    144 /*
    145  * Can be raised to suppress further weakbinding, which are instead
    146  * satisfied by disabling preemption.  Must be raised/lowered under cpu_lock,
    147  * while individual thread weakbinding synchronization is done under thread
    148  * lock.
    149  */
    150 int weakbindingbarrier;
    151 
    152 /*
    153  * Variables used in pause_cpus().
    154  */
    155 static volatile char safe_list[NCPU];
    156 
    157 static struct _cpu_pause_info {
    158 	int		cp_spl;		/* spl saved in pause_cpus() */
    159 	volatile int	cp_go;		/* Go signal sent after all ready */
    160 	int		cp_count;	/* # of CPUs to pause */
    161 	ksema_t		cp_sem;		/* synch pause_cpus & cpu_pause */
    162 	kthread_id_t	cp_paused;
    163 } cpu_pause_info;
    164 
    165 static kmutex_t pause_free_mutex;
    166 static kcondvar_t pause_free_cv;
    167 
    168 void *(*cpu_pause_func)(void *) = NULL;
    169 
    170 
    171 static struct cpu_sys_stats_ks_data {
    172 	kstat_named_t cpu_ticks_idle;
    173 	kstat_named_t cpu_ticks_user;
    174 	kstat_named_t cpu_ticks_kernel;
    175 	kstat_named_t cpu_ticks_wait;
    176 	kstat_named_t cpu_nsec_idle;
    177 	kstat_named_t cpu_nsec_user;
    178 	kstat_named_t cpu_nsec_kernel;
    179 	kstat_named_t cpu_nsec_intr;
    180 	kstat_named_t cpu_load_intr;
    181 	kstat_named_t wait_ticks_io;
    182 	kstat_named_t bread;
    183 	kstat_named_t bwrite;
    184 	kstat_named_t lread;
    185 	kstat_named_t lwrite;
    186 	kstat_named_t phread;
    187 	kstat_named_t phwrite;
    188 	kstat_named_t pswitch;
    189 	kstat_named_t trap;
    190 	kstat_named_t intr;
    191 	kstat_named_t syscall;
    192 	kstat_named_t sysread;
    193 	kstat_named_t syswrite;
    194 	kstat_named_t sysfork;
    195 	kstat_named_t sysvfork;
    196 	kstat_named_t sysexec;
    197 	kstat_named_t readch;
    198 	kstat_named_t writech;
    199 	kstat_named_t rcvint;
    200 	kstat_named_t xmtint;
    201 	kstat_named_t mdmint;
    202 	kstat_named_t rawch;
    203 	kstat_named_t canch;
    204 	kstat_named_t outch;
    205 	kstat_named_t msg;
    206 	kstat_named_t sema;
    207 	kstat_named_t namei;
    208 	kstat_named_t ufsiget;
    209 	kstat_named_t ufsdirblk;
    210 	kstat_named_t ufsipage;
    211 	kstat_named_t ufsinopage;
    212 	kstat_named_t procovf;
    213 	kstat_named_t intrthread;
    214 	kstat_named_t intrblk;
    215 	kstat_named_t intrunpin;
    216 	kstat_named_t idlethread;
    217 	kstat_named_t inv_swtch;
    218 	kstat_named_t nthreads;
    219 	kstat_named_t cpumigrate;
    220 	kstat_named_t xcalls;
    221 	kstat_named_t mutex_adenters;
    222 	kstat_named_t rw_rdfails;
    223 	kstat_named_t rw_wrfails;
    224 	kstat_named_t modload;
    225 	kstat_named_t modunload;
    226 	kstat_named_t bawrite;
    227 	kstat_named_t iowait;
    228 } cpu_sys_stats_ks_data_template = {
    229 	{ "cpu_ticks_idle", 	KSTAT_DATA_UINT64 },
    230 	{ "cpu_ticks_user", 	KSTAT_DATA_UINT64 },
    231 	{ "cpu_ticks_kernel", 	KSTAT_DATA_UINT64 },
    232 	{ "cpu_ticks_wait", 	KSTAT_DATA_UINT64 },
    233 	{ "cpu_nsec_idle",	KSTAT_DATA_UINT64 },
    234 	{ "cpu_nsec_user",	KSTAT_DATA_UINT64 },
    235 	{ "cpu_nsec_kernel",	KSTAT_DATA_UINT64 },
    236 	{ "cpu_nsec_intr",	KSTAT_DATA_UINT64 },
    237 	{ "cpu_load_intr",	KSTAT_DATA_UINT64 },
    238 	{ "wait_ticks_io", 	KSTAT_DATA_UINT64 },
    239 	{ "bread", 		KSTAT_DATA_UINT64 },
    240 	{ "bwrite", 		KSTAT_DATA_UINT64 },
    241 	{ "lread", 		KSTAT_DATA_UINT64 },
    242 	{ "lwrite", 		KSTAT_DATA_UINT64 },
    243 	{ "phread", 		KSTAT_DATA_UINT64 },
    244 	{ "phwrite", 		KSTAT_DATA_UINT64 },
    245 	{ "pswitch", 		KSTAT_DATA_UINT64 },
    246 	{ "trap", 		KSTAT_DATA_UINT64 },
    247 	{ "intr", 		KSTAT_DATA_UINT64 },
    248 	{ "syscall", 		KSTAT_DATA_UINT64 },
    249 	{ "sysread", 		KSTAT_DATA_UINT64 },
    250 	{ "syswrite", 		KSTAT_DATA_UINT64 },
    251 	{ "sysfork", 		KSTAT_DATA_UINT64 },
    252 	{ "sysvfork", 		KSTAT_DATA_UINT64 },
    253 	{ "sysexec", 		KSTAT_DATA_UINT64 },
    254 	{ "readch", 		KSTAT_DATA_UINT64 },
    255 	{ "writech", 		KSTAT_DATA_UINT64 },
    256 	{ "rcvint", 		KSTAT_DATA_UINT64 },
    257 	{ "xmtint", 		KSTAT_DATA_UINT64 },
    258 	{ "mdmint", 		KSTAT_DATA_UINT64 },
    259 	{ "rawch", 		KSTAT_DATA_UINT64 },
    260 	{ "canch", 		KSTAT_DATA_UINT64 },
    261 	{ "outch", 		KSTAT_DATA_UINT64 },
    262 	{ "msg", 		KSTAT_DATA_UINT64 },
    263 	{ "sema", 		KSTAT_DATA_UINT64 },
    264 	{ "namei", 		KSTAT_DATA_UINT64 },
    265 	{ "ufsiget", 		KSTAT_DATA_UINT64 },
    266 	{ "ufsdirblk", 		KSTAT_DATA_UINT64 },
    267 	{ "ufsipage", 		KSTAT_DATA_UINT64 },
    268 	{ "ufsinopage", 	KSTAT_DATA_UINT64 },
    269 	{ "procovf", 		KSTAT_DATA_UINT64 },
    270 	{ "intrthread", 	KSTAT_DATA_UINT64 },
    271 	{ "intrblk", 		KSTAT_DATA_UINT64 },
    272 	{ "intrunpin",		KSTAT_DATA_UINT64 },
    273 	{ "idlethread", 	KSTAT_DATA_UINT64 },
    274 	{ "inv_swtch", 		KSTAT_DATA_UINT64 },
    275 	{ "nthreads", 		KSTAT_DATA_UINT64 },
    276 	{ "cpumigrate", 	KSTAT_DATA_UINT64 },
    277 	{ "xcalls", 		KSTAT_DATA_UINT64 },
    278 	{ "mutex_adenters", 	KSTAT_DATA_UINT64 },
    279 	{ "rw_rdfails", 	KSTAT_DATA_UINT64 },
    280 	{ "rw_wrfails", 	KSTAT_DATA_UINT64 },
    281 	{ "modload", 		KSTAT_DATA_UINT64 },
    282 	{ "modunload", 		KSTAT_DATA_UINT64 },
    283 	{ "bawrite", 		KSTAT_DATA_UINT64 },
    284 	{ "iowait",		KSTAT_DATA_UINT64 },
    285 };
    286 
    287 static struct cpu_vm_stats_ks_data {
    288 	kstat_named_t pgrec;
    289 	kstat_named_t pgfrec;
    290 	kstat_named_t pgin;
    291 	kstat_named_t pgpgin;
    292 	kstat_named_t pgout;
    293 	kstat_named_t pgpgout;
    294 	kstat_named_t swapin;
    295 	kstat_named_t pgswapin;
    296 	kstat_named_t swapout;
    297 	kstat_named_t pgswapout;
    298 	kstat_named_t zfod;
    299 	kstat_named_t dfree;
    300 	kstat_named_t scan;
    301 	kstat_named_t rev;
    302 	kstat_named_t hat_fault;
    303 	kstat_named_t as_fault;
    304 	kstat_named_t maj_fault;
    305 	kstat_named_t cow_fault;
    306 	kstat_named_t prot_fault;
    307 	kstat_named_t softlock;
    308 	kstat_named_t kernel_asflt;
    309 	kstat_named_t pgrrun;
    310 	kstat_named_t execpgin;
    311 	kstat_named_t execpgout;
    312 	kstat_named_t execfree;
    313 	kstat_named_t anonpgin;
    314 	kstat_named_t anonpgout;
    315 	kstat_named_t anonfree;
    316 	kstat_named_t fspgin;
    317 	kstat_named_t fspgout;
    318 	kstat_named_t fsfree;
    319 } cpu_vm_stats_ks_data_template = {
    320 	{ "pgrec",		KSTAT_DATA_UINT64 },
    321 	{ "pgfrec",		KSTAT_DATA_UINT64 },
    322 	{ "pgin",		KSTAT_DATA_UINT64 },
    323 	{ "pgpgin",		KSTAT_DATA_UINT64 },
    324 	{ "pgout",		KSTAT_DATA_UINT64 },
    325 	{ "pgpgout",		KSTAT_DATA_UINT64 },
    326 	{ "swapin",		KSTAT_DATA_UINT64 },
    327 	{ "pgswapin",		KSTAT_DATA_UINT64 },
    328 	{ "swapout",		KSTAT_DATA_UINT64 },
    329 	{ "pgswapout",		KSTAT_DATA_UINT64 },
    330 	{ "zfod",		KSTAT_DATA_UINT64 },
    331 	{ "dfree",		KSTAT_DATA_UINT64 },
    332 	{ "scan",		KSTAT_DATA_UINT64 },
    333 	{ "rev",		KSTAT_DATA_UINT64 },
    334 	{ "hat_fault",		KSTAT_DATA_UINT64 },
    335 	{ "as_fault",		KSTAT_DATA_UINT64 },
    336 	{ "maj_fault",		KSTAT_DATA_UINT64 },
    337 	{ "cow_fault",		KSTAT_DATA_UINT64 },
    338 	{ "prot_fault",		KSTAT_DATA_UINT64 },
    339 	{ "softlock",		KSTAT_DATA_UINT64 },
    340 	{ "kernel_asflt",	KSTAT_DATA_UINT64 },
    341 	{ "pgrrun",		KSTAT_DATA_UINT64 },
    342 	{ "execpgin",		KSTAT_DATA_UINT64 },
    343 	{ "execpgout",		KSTAT_DATA_UINT64 },
    344 	{ "execfree",		KSTAT_DATA_UINT64 },
    345 	{ "anonpgin",		KSTAT_DATA_UINT64 },
    346 	{ "anonpgout",		KSTAT_DATA_UINT64 },
    347 	{ "anonfree",		KSTAT_DATA_UINT64 },
    348 	{ "fspgin",		KSTAT_DATA_UINT64 },
    349 	{ "fspgout",		KSTAT_DATA_UINT64 },
    350 	{ "fsfree",		KSTAT_DATA_UINT64 },
    351 };
    352 
    353 /*
    354  * Force the specified thread to migrate to the appropriate processor.
    355  * Called with thread lock held, returns with it dropped.
    356  */
    357 static void
    358 force_thread_migrate(kthread_id_t tp)
    359 {
    360 	ASSERT(THREAD_LOCK_HELD(tp));
    361 	if (tp == curthread) {
    362 		THREAD_TRANSITION(tp);
    363 		CL_SETRUN(tp);
    364 		thread_unlock_nopreempt(tp);
    365 		swtch();
    366 	} else {
    367 		if (tp->t_state == TS_ONPROC) {
    368 			cpu_surrender(tp);
    369 		} else if (tp->t_state == TS_RUN) {
    370 			(void) dispdeq(tp);
    371 			setbackdq(tp);
    372 		}
    373 		thread_unlock(tp);
    374 	}
    375 }
    376 
    377 /*
    378  * Set affinity for a specified CPU.
    379  * A reference count is incremented and the affinity is held until the
    380  * reference count is decremented to zero by thread_affinity_clear().
    381  * This is so regions of code requiring affinity can be nested.
    382  * Caller needs to ensure that cpu_id remains valid, which can be
    383  * done by holding cpu_lock across this call, unless the caller
    384  * specifies CPU_CURRENT in which case the cpu_lock will be acquired
    385  * by thread_affinity_set and CPU->cpu_id will be the target CPU.
    386  */
    387 void
    388 thread_affinity_set(kthread_id_t t, int cpu_id)
    389 {
    390 	cpu_t		*cp;
    391 	int		c;
    392 
    393 	ASSERT(!(t == curthread && t->t_weakbound_cpu != NULL));
    394 
    395 	if ((c = cpu_id) == CPU_CURRENT) {
    396 		mutex_enter(&cpu_lock);
    397 		cpu_id = CPU->cpu_id;
    398 	}
    399 	/*
    400 	 * We should be asserting that cpu_lock is held here, but
    401 	 * the NCA code doesn't acquire it.  The following assert
    402 	 * should be uncommented when the NCA code is fixed.
    403 	 *
    404 	 * ASSERT(MUTEX_HELD(&cpu_lock));
    405 	 */
    406 	ASSERT((cpu_id >= 0) && (cpu_id < NCPU));
    407 	cp = cpu[cpu_id];
    408 	ASSERT(cp != NULL);		/* user must provide a good cpu_id */
    409 	/*
    410 	 * If there is already a hard affinity requested, and this affinity
    411 	 * conflicts with that, panic.
    412 	 */
    413 	thread_lock(t);
    414 	if (t->t_affinitycnt > 0 && t->t_bound_cpu != cp) {
    415 		panic("affinity_set: setting %p but already bound to %p",
    416 		    (void *)cp, (void *)t->t_bound_cpu);
    417 	}
    418 	t->t_affinitycnt++;
    419 	t->t_bound_cpu = cp;
    420 
    421 	/*
    422 	 * Make sure we're running on the right CPU.
    423 	 */
    424 	if (cp != t->t_cpu || t != curthread) {
    425 		force_thread_migrate(t);	/* drops thread lock */
    426 	} else {
    427 		thread_unlock(t);
    428 	}
    429 
    430 	if (c == CPU_CURRENT)
    431 		mutex_exit(&cpu_lock);
    432 }
    433 
    434 /*
    435  *	Wrapper for backward compatibility.
    436  */
    437 void
    438 affinity_set(int cpu_id)
    439 {
    440 	thread_affinity_set(curthread, cpu_id);
    441 }
    442 
    443 /*
    444  * Decrement the affinity reservation count and if it becomes zero,
    445  * clear the CPU affinity for the current thread, or set it to the user's
    446  * software binding request.
    447  */
    448 void
    449 thread_affinity_clear(kthread_id_t t)
    450 {
    451 	register processorid_t binding;
    452 
    453 	thread_lock(t);
    454 	if (--t->t_affinitycnt == 0) {
    455 		if ((binding = t->t_bind_cpu) == PBIND_NONE) {
    456 			/*
    457 			 * Adjust disp_max_unbound_pri if necessary.
    458 			 */
    459 			disp_adjust_unbound_pri(t);
    460 			t->t_bound_cpu = NULL;
    461 			if (t->t_cpu->cpu_part != t->t_cpupart) {
    462 				force_thread_migrate(t);
    463 				return;
    464 			}
    465 		} else {
    466 			t->t_bound_cpu = cpu[binding];
    467 			/*
    468 			 * Make sure the thread is running on the bound CPU.
    469 			 */
    470 			if (t->t_cpu != t->t_bound_cpu) {
    471 				force_thread_migrate(t);
    472 				return;		/* already dropped lock */
    473 			}
    474 		}
    475 	}
    476 	thread_unlock(t);
    477 }
    478 
    479 /*
    480  * Wrapper for backward compatibility.
    481  */
    482 void
    483 affinity_clear(void)
    484 {
    485 	thread_affinity_clear(curthread);
    486 }
    487 
    488 /*
    489  * Weak cpu affinity.  Bind to the "current" cpu for short periods
    490  * of time during which the thread must not block (but may be preempted).
    491  * Use this instead of kpreempt_disable() when it is only "no migration"
    492  * rather than "no preemption" semantics that are required - disabling
    493  * preemption holds higher priority threads off of cpu and if the
    494  * operation that is protected is more than momentary this is not good
    495  * for realtime etc.
    496  *
    497  * Weakly bound threads will not prevent a cpu from being offlined -
    498  * we'll only run them on the cpu to which they are weakly bound but
    499  * (because they do not block) we'll always be able to move them on to
    500  * another cpu at offline time if we give them just a short moment to
    501  * run during which they will unbind.  To give a cpu a chance of offlining,
    502  * however, we require a barrier to weak bindings that may be raised for a
    503  * given cpu (offline/move code may set this and then wait a short time for
    504  * existing weak bindings to drop); the cpu_inmotion pointer is that barrier.
    505  *
    506  * There are few restrictions on the calling context of thread_nomigrate.
    507  * The caller must not hold the thread lock.  Calls may be nested.
    508  *
    509  * After weakbinding a thread must not perform actions that may block.
    510  * In particular it must not call thread_affinity_set; calling that when
    511  * already weakbound is nonsensical anyway.
    512  *
    513  * If curthread is prevented from migrating for other reasons
    514  * (kernel preemption disabled; high pil; strongly bound; interrupt thread)
    515  * then the weak binding will succeed even if this cpu is the target of an
    516  * offline/move request.
    517  */
    518 void
    519 thread_nomigrate(void)
    520 {
    521 	cpu_t *cp;
    522 	kthread_id_t t = curthread;
    523 
    524 again:
    525 	kpreempt_disable();
    526 	cp = CPU;
    527 
    528 	/*
    529 	 * A highlevel interrupt must not modify t_nomigrate or
    530 	 * t_weakbound_cpu of the thread it has interrupted.  A lowlevel
    531 	 * interrupt thread cannot migrate and we can avoid the
    532 	 * thread_lock call below by short-circuiting here.  In either
    533 	 * case we can just return since no migration is possible and
    534 	 * the condition will persist (ie, when we test for these again
    535 	 * in thread_allowmigrate they can't have changed).   Migration
    536 	 * is also impossible if we're at or above DISP_LEVEL pil.
    537 	 */
    538 	if (CPU_ON_INTR(cp) || t->t_flag & T_INTR_THREAD ||
    539 	    getpil() >= DISP_LEVEL) {
    540 		kpreempt_enable();
    541 		return;
    542 	}
    543 
    544 	/*
    545 	 * We must be consistent with existing weak bindings.  Since we
    546 	 * may be interrupted between the increment of t_nomigrate and
    547 	 * the store to t_weakbound_cpu below we cannot assume that
    548 	 * t_weakbound_cpu will be set if t_nomigrate is.  Note that we
    549 	 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not
    550 	 * always the case.
    551 	 */
    552 	if (t->t_nomigrate && t->t_weakbound_cpu && t->t_weakbound_cpu != cp) {
    553 		if (!panicstr)
    554 			panic("thread_nomigrate: binding to %p but already "
    555 			    "bound to %p", (void *)cp,
    556 			    (void *)t->t_weakbound_cpu);
    557 	}
    558 
    559 	/*
    560 	 * At this point we have preemption disabled and we don't yet hold
    561 	 * the thread lock.  So it's possible that somebody else could
    562 	 * set t_bind_cpu here and not be able to force us across to the
    563 	 * new cpu (since we have preemption disabled).
    564 	 */
    565 	thread_lock(curthread);
    566 
    567 	/*
    568 	 * If further weak bindings are being (temporarily) suppressed then
    569 	 * we'll settle for disabling kernel preemption (which assures
    570 	 * no migration provided the thread does not block which it is
    571 	 * not allowed to if using thread_nomigrate).  We must remember
    572 	 * this disposition so we can take appropriate action in
    573 	 * thread_allowmigrate.  If this is a nested call and the
    574 	 * thread is already weakbound then fall through as normal.
    575 	 * We remember the decision to settle for kpreempt_disable through
    576 	 * negative nesting counting in t_nomigrate.  Once a thread has had one
    577 	 * weakbinding request satisfied in this way any further (nested)
    578 	 * requests will continue to be satisfied in the same way,
    579 	 * even if weak bindings have recommenced.
    580 	 */
    581 	if (t->t_nomigrate < 0 || weakbindingbarrier && t->t_nomigrate == 0) {
    582 		--t->t_nomigrate;
    583 		thread_unlock(curthread);
    584 		return;		/* with kpreempt_disable still active */
    585 	}
    586 
    587 	/*
    588 	 * We hold thread_lock so t_bind_cpu cannot change.  We could,
    589 	 * however, be running on a different cpu to which we are t_bound_cpu
    590 	 * to (as explained above).  If we grant the weak binding request
    591 	 * in that case then the dispatcher must favour our weak binding
    592 	 * over our strong (in which case, just as when preemption is
    593 	 * disabled, we can continue to run on a cpu other than the one to
    594 	 * which we are strongbound; the difference in this case is that
    595 	 * this thread can be preempted and so can appear on the dispatch
    596 	 * queues of a cpu other than the one it is strongbound to).
    597 	 *
    598 	 * If the cpu we are running on does not appear to be a current
    599 	 * offline target (we check cpu_inmotion to determine this - since
    600 	 * we don't hold cpu_lock we may not see a recent store to that,
    601 	 * so it's possible that we at times can grant a weak binding to a
    602 	 * cpu that is an offline target, but that one request will not
    603 	 * prevent the offline from succeeding) then we will always grant
    604 	 * the weak binding request.  This includes the case above where
    605 	 * we grant a weakbinding not commensurate with our strong binding.
    606 	 *
    607 	 * If our cpu does appear to be an offline target then we're inclined
    608 	 * not to grant the weakbinding request just yet - we'd prefer to
    609 	 * migrate to another cpu and grant the request there.  The
    610 	 * exceptions are those cases where going through preemption code
    611 	 * will not result in us changing cpu:
    612 	 *
    613 	 *	. interrupts have already bypassed this case (see above)
    614 	 *	. we are already weakbound to this cpu (dispatcher code will
    615 	 *	  always return us to the weakbound cpu)
    616 	 *	. preemption was disabled even before we disabled it above
    617 	 *	. we are strongbound to this cpu (if we're strongbound to
    618 	 *	another and not yet running there the trip through the
    619 	 *	dispatcher will move us to the strongbound cpu and we
    620 	 *	will grant the weak binding there)
    621 	 */
    622 	if (cp != cpu_inmotion || t->t_nomigrate > 0 || t->t_preempt > 1 ||
    623 	    t->t_bound_cpu == cp) {
    624 		/*
    625 		 * Don't be tempted to store to t_weakbound_cpu only on
    626 		 * the first nested bind request - if we're interrupted
    627 		 * after the increment of t_nomigrate and before the
    628 		 * store to t_weakbound_cpu and the interrupt calls
    629 		 * thread_nomigrate then the assertion in thread_allowmigrate
    630 		 * would fail.
    631 		 */
    632 		t->t_nomigrate++;
    633 		t->t_weakbound_cpu = cp;
    634 		membar_producer();
    635 		thread_unlock(curthread);
    636 		/*
    637 		 * Now that we have dropped the thread_lock another thread
    638 		 * can set our t_weakbound_cpu, and will try to migrate us
    639 		 * to the strongbound cpu (which will not be prevented by
    640 		 * preemption being disabled since we're about to enable
    641 		 * preemption).  We have granted the weakbinding to the current
    642 		 * cpu, so again we are in the position that is is is possible
    643 		 * that our weak and strong bindings differ.  Again this
    644 		 * is catered for by dispatcher code which will favour our
    645 		 * weak binding.
    646 		 */
    647 		kpreempt_enable();
    648 	} else {
    649 		/*
    650 		 * Move to another cpu before granting the request by
    651 		 * forcing this thread through preemption code.  When we
    652 		 * get to set{front,back}dq called from CL_PREEMPT()
    653 		 * cpu_choose() will be used to select a cpu to queue
    654 		 * us on - that will see cpu_inmotion and take
    655 		 * steps to avoid returning us to this cpu.
    656 		 */
    657 		cp->cpu_kprunrun = 1;
    658 		thread_unlock(curthread);
    659 		kpreempt_enable();	/* will call preempt() */
    660 		goto again;
    661 	}
    662 }
    663 
    664 void
    665 thread_allowmigrate(void)
    666 {
    667 	kthread_id_t t = curthread;
    668 
    669 	ASSERT(t->t_weakbound_cpu == CPU ||
    670 	    (t->t_nomigrate < 0 && t->t_preempt > 0) ||
    671 	    CPU_ON_INTR(CPU) || t->t_flag & T_INTR_THREAD ||
    672 	    getpil() >= DISP_LEVEL);
    673 
    674 	if (CPU_ON_INTR(CPU) || (t->t_flag & T_INTR_THREAD) ||
    675 	    getpil() >= DISP_LEVEL)
    676 		return;
    677 
    678 	if (t->t_nomigrate < 0) {
    679 		/*
    680 		 * This thread was granted "weak binding" in the
    681 		 * stronger form of kernel preemption disabling.
    682 		 * Undo a level of nesting for both t_nomigrate
    683 		 * and t_preempt.
    684 		 */
    685 		++t->t_nomigrate;
    686 		kpreempt_enable();
    687 	} else if (--t->t_nomigrate == 0) {
    688 		/*
    689 		 * Time to drop the weak binding.  We need to cater
    690 		 * for the case where we're weakbound to a different
    691 		 * cpu than that to which we're strongbound (a very
    692 		 * temporary arrangement that must only persist until
    693 		 * weak binding drops).  We don't acquire thread_lock
    694 		 * here so even as this code executes t_bound_cpu
    695 		 * may be changing.  So we disable preemption and
    696 		 * a) in the case that t_bound_cpu changes while we
    697 		 * have preemption disabled kprunrun will be set
    698 		 * asynchronously, and b) if before disabling
    699 		 * preemption we were already on a different cpu to
    700 		 * our t_bound_cpu then we set kprunrun ourselves
    701 		 * to force a trip through the dispatcher when
    702 		 * preemption is enabled.
    703 		 */
    704 		kpreempt_disable();
    705 		if (t->t_bound_cpu &&
    706 		    t->t_weakbound_cpu != t->t_bound_cpu)
    707 			CPU->cpu_kprunrun = 1;
    708 		t->t_weakbound_cpu = NULL;
    709 		membar_producer();
    710 		kpreempt_enable();
    711 	}
    712 }
    713 
    714 /*
    715  * weakbinding_stop can be used to temporarily cause weakbindings made
    716  * with thread_nomigrate to be satisfied through the stronger action of
    717  * kpreempt_disable.  weakbinding_start recommences normal weakbinding.
    718  */
    719 
    720 void
    721 weakbinding_stop(void)
    722 {
    723 	ASSERT(MUTEX_HELD(&cpu_lock));
    724 	weakbindingbarrier = 1;
    725 	membar_producer();	/* make visible before subsequent thread_lock */
    726 }
    727 
    728 void
    729 weakbinding_start(void)
    730 {
    731 	ASSERT(MUTEX_HELD(&cpu_lock));
    732 	weakbindingbarrier = 0;
    733 }
    734 
    735 void
    736 null_xcall(void)
    737 {
    738 }
    739 
    740 /*
    741  * This routine is called to place the CPUs in a safe place so that
    742  * one of them can be taken off line or placed on line.  What we are
    743  * trying to do here is prevent a thread from traversing the list
    744  * of active CPUs while we are changing it or from getting placed on
    745  * the run queue of a CPU that has just gone off line.  We do this by
    746  * creating a thread with the highest possible prio for each CPU and
    747  * having it call this routine.  The advantage of this method is that
    748  * we can eliminate all checks for CPU_ACTIVE in the disp routines.
    749  * This makes disp faster at the expense of making p_online() slower
    750  * which is a good trade off.
    751  */
    752 static void
    753 cpu_pause(int index)
    754 {
    755 	int s;
    756 	struct _cpu_pause_info *cpi = &cpu_pause_info;
    757 	volatile char *safe = &safe_list[index];
    758 	long    lindex = index;
    759 
    760 	ASSERT((curthread->t_bound_cpu != NULL) || (*safe == PAUSE_DIE));
    761 
    762 	while (*safe != PAUSE_DIE) {
    763 		*safe = PAUSE_READY;
    764 		membar_enter();		/* make sure stores are flushed */
    765 		sema_v(&cpi->cp_sem);	/* signal requesting thread */
    766 
    767 		/*
    768 		 * Wait here until all pause threads are running.  That
    769 		 * indicates that it's safe to do the spl.  Until
    770 		 * cpu_pause_info.cp_go is set, we don't want to spl
    771 		 * because that might block clock interrupts needed
    772 		 * to preempt threads on other CPUs.
    773 		 */
    774 		while (cpi->cp_go == 0)
    775 			;
    776 		/*
    777 		 * Even though we are at the highest disp prio, we need
    778 		 * to block out all interrupts below LOCK_LEVEL so that
    779 		 * an intr doesn't come in, wake up a thread, and call
    780 		 * setbackdq/setfrontdq.
    781 		 */
    782 		s = splhigh();
    783 		/*
    784 		 * if cpu_pause_func() has been set then call it using
    785 		 * index as the argument, currently only used by
    786 		 * cpr_suspend_cpus().  This function is used as the
    787 		 * code to execute on the "paused" cpu's when a machine
    788 		 * comes out of a sleep state and CPU's were powered off.
    789 		 * (could also be used for hotplugging CPU's).
    790 		 */
    791 		if (cpu_pause_func != NULL)
    792 			(*cpu_pause_func)((void *)lindex);
    793 
    794 		mach_cpu_pause(safe);
    795 
    796 		splx(s);
    797 		/*
    798 		 * Waiting is at an end. Switch out of cpu_pause
    799 		 * loop and resume useful work.
    800 		 */
    801 		swtch();
    802 	}
    803 
    804 	mutex_enter(&pause_free_mutex);
    805 	*safe = PAUSE_DEAD;
    806 	cv_broadcast(&pause_free_cv);
    807 	mutex_exit(&pause_free_mutex);
    808 }
    809 
    810 /*
    811  * Allow the cpus to start running again.
    812  */
    813 void
    814 start_cpus()
    815 {
    816 	int i;
    817 
    818 	ASSERT(MUTEX_HELD(&cpu_lock));
    819 	ASSERT(cpu_pause_info.cp_paused);
    820 	cpu_pause_info.cp_paused = NULL;
    821 	for (i = 0; i < NCPU; i++)
    822 		safe_list[i] = PAUSE_IDLE;
    823 	membar_enter();			/* make sure stores are flushed */
    824 	affinity_clear();
    825 	splx(cpu_pause_info.cp_spl);
    826 	kpreempt_enable();
    827 }
    828 
    829 /*
    830  * Allocate a pause thread for a CPU.
    831  */
    832 static void
    833 cpu_pause_alloc(cpu_t *cp)
    834 {
    835 	kthread_id_t	t;
    836 	long		cpun = cp->cpu_id;
    837 
    838 	/*
    839 	 * Note, v.v_nglobpris will not change value as long as I hold
    840 	 * cpu_lock.
    841 	 */
    842 	t = thread_create(NULL, 0, cpu_pause, (void *)cpun,
    843 	    0, &p0, TS_STOPPED, v.v_nglobpris - 1);
    844 	thread_lock(t);
    845 	t->t_bound_cpu = cp;
    846 	t->t_disp_queue = cp->cpu_disp;
    847 	t->t_affinitycnt = 1;
    848 	t->t_preempt = 1;
    849 	thread_unlock(t);
    850 	cp->cpu_pause_thread = t;
    851 	/*
    852 	 * Registering a thread in the callback table is usually done
    853 	 * in the initialization code of the thread.  In this
    854 	 * case, we do it right after thread creation because the
    855 	 * thread itself may never run, and we need to register the
    856 	 * fact that it is safe for cpr suspend.
    857 	 */
    858 	CALLB_CPR_INIT_SAFE(t, "cpu_pause");
    859 }
    860 
    861 /*
    862  * Free a pause thread for a CPU.
    863  */
    864 static void
    865 cpu_pause_free(cpu_t *cp)
    866 {
    867 	kthread_id_t	t;
    868 	int		cpun = cp->cpu_id;
    869 
    870 	ASSERT(MUTEX_HELD(&cpu_lock));
    871 	/*
    872 	 * We have to get the thread and tell him to die.
    873 	 */
    874 	if ((t = cp->cpu_pause_thread) == NULL) {
    875 		ASSERT(safe_list[cpun] == PAUSE_IDLE);
    876 		return;
    877 	}
    878 	thread_lock(t);
    879 	t->t_cpu = CPU;		/* disp gets upset if last cpu is quiesced. */
    880 	t->t_bound_cpu = NULL;	/* Must un-bind; cpu may not be running. */
    881 	t->t_pri = v.v_nglobpris - 1;
    882 	ASSERT(safe_list[cpun] == PAUSE_IDLE);
    883 	safe_list[cpun] = PAUSE_DIE;
    884 	THREAD_TRANSITION(t);
    885 	setbackdq(t);
    886 	thread_unlock_nopreempt(t);
    887 
    888 	/*
    889 	 * If we don't wait for the thread to actually die, it may try to
    890 	 * run on the wrong cpu as part of an actual call to pause_cpus().
    891 	 */
    892 	mutex_enter(&pause_free_mutex);
    893 	while (safe_list[cpun] != PAUSE_DEAD) {
    894 		cv_wait(&pause_free_cv, &pause_free_mutex);
    895 	}
    896 	mutex_exit(&pause_free_mutex);
    897 	safe_list[cpun] = PAUSE_IDLE;
    898 
    899 	cp->cpu_pause_thread = NULL;
    900 }
    901 
    902 /*
    903  * Initialize basic structures for pausing CPUs.
    904  */
    905 void
    906 cpu_pause_init()
    907 {
    908 	sema_init(&cpu_pause_info.cp_sem, 0, NULL, SEMA_DEFAULT, NULL);
    909 	/*
    910 	 * Create initial CPU pause thread.
    911 	 */
    912 	cpu_pause_alloc(CPU);
    913 }
    914 
    915 /*
    916  * Start the threads used to pause another CPU.
    917  */
    918 static int
    919 cpu_pause_start(processorid_t cpu_id)
    920 {
    921 	int	i;
    922 	int	cpu_count = 0;
    923 
    924 	for (i = 0; i < NCPU; i++) {
    925 		cpu_t		*cp;
    926 		kthread_id_t	t;
    927 
    928 		cp = cpu[i];
    929 		if (!CPU_IN_SET(cpu_available, i) || (i == cpu_id)) {
    930 			safe_list[i] = PAUSE_WAIT;
    931 			continue;
    932 		}
    933 
    934 		/*
    935 		 * Skip CPU if it is quiesced or not yet started.
    936 		 */
    937 		if ((cp->cpu_flags & (CPU_QUIESCED | CPU_READY)) != CPU_READY) {
    938 			safe_list[i] = PAUSE_WAIT;
    939 			continue;
    940 		}
    941 
    942 		/*
    943 		 * Start this CPU's pause thread.
    944 		 */
    945 		t = cp->cpu_pause_thread;
    946 		thread_lock(t);
    947 		/*
    948 		 * Reset the priority, since nglobpris may have
    949 		 * changed since the thread was created, if someone
    950 		 * has loaded the RT (or some other) scheduling
    951 		 * class.
    952 		 */
    953 		t->t_pri = v.v_nglobpris - 1;
    954 		THREAD_TRANSITION(t);
    955 		setbackdq(t);
    956 		thread_unlock_nopreempt(t);
    957 		++cpu_count;
    958 	}
    959 	return (cpu_count);
    960 }
    961 
    962 
    963 /*
    964  * Pause all of the CPUs except the one we are on by creating a high
    965  * priority thread bound to those CPUs.
    966  *
    967  * Note that one must be extremely careful regarding code
    968  * executed while CPUs are paused.  Since a CPU may be paused
    969  * while a thread scheduling on that CPU is holding an adaptive
    970  * lock, code executed with CPUs paused must not acquire adaptive
    971  * (or low-level spin) locks.  Also, such code must not block,
    972  * since the thread that is supposed to initiate the wakeup may
    973  * never run.
    974  *
    975  * With a few exceptions, the restrictions on code executed with CPUs
    976  * paused match those for code executed at high-level interrupt
    977  * context.
    978  */
    979 void
    980 pause_cpus(cpu_t *off_cp)
    981 {
    982 	processorid_t	cpu_id;
    983 	int		i;
    984 	struct _cpu_pause_info	*cpi = &cpu_pause_info;
    985 
    986 	ASSERT(MUTEX_HELD(&cpu_lock));
    987 	ASSERT(cpi->cp_paused == NULL);
    988 	cpi->cp_count = 0;
    989 	cpi->cp_go = 0;
    990 	for (i = 0; i < NCPU; i++)
    991 		safe_list[i] = PAUSE_IDLE;
    992 	kpreempt_disable();
    993 
    994 	/*
    995 	 * If running on the cpu that is going offline, get off it.
    996 	 * This is so that it won't be necessary to rechoose a CPU
    997 	 * when done.
    998 	 */
    999 	if (CPU == off_cp)
   1000 		cpu_id = off_cp->cpu_next_part->cpu_id;
   1001 	else
   1002 		cpu_id = CPU->cpu_id;
   1003 	affinity_set(cpu_id);
   1004 
   1005 	/*
   1006 	 * Start the pause threads and record how many were started
   1007 	 */
   1008 	cpi->cp_count = cpu_pause_start(cpu_id);
   1009 
   1010 	/*
   1011 	 * Now wait for all CPUs to be running the pause thread.
   1012 	 */
   1013 	while (cpi->cp_count > 0) {
   1014 		/*
   1015 		 * Spin reading the count without grabbing the disp
   1016 		 * lock to make sure we don't prevent the pause
   1017 		 * threads from getting the lock.
   1018 		 */
   1019 		while (sema_held(&cpi->cp_sem))
   1020 			;
   1021 		if (sema_tryp(&cpi->cp_sem))
   1022 			--cpi->cp_count;
   1023 	}
   1024 	cpi->cp_go = 1;			/* all have reached cpu_pause */
   1025 
   1026 	/*
   1027 	 * Now wait for all CPUs to spl. (Transition from PAUSE_READY
   1028 	 * to PAUSE_WAIT.)
   1029 	 */
   1030 	for (i = 0; i < NCPU; i++) {
   1031 		while (safe_list[i] != PAUSE_WAIT)
   1032 			;
   1033 	}
   1034 	cpi->cp_spl = splhigh();	/* block dispatcher on this CPU */
   1035 	cpi->cp_paused = curthread;
   1036 }
   1037 
   1038 /*
   1039  * Check whether the current thread has CPUs paused
   1040  */
   1041 int
   1042 cpus_paused(void)
   1043 {
   1044 	if (cpu_pause_info.cp_paused != NULL) {
   1045 		ASSERT(cpu_pause_info.cp_paused == curthread);
   1046 		return (1);
   1047 	}
   1048 	return (0);
   1049 }
   1050 
   1051 static cpu_t *
   1052 cpu_get_all(processorid_t cpun)
   1053 {
   1054 	ASSERT(MUTEX_HELD(&cpu_lock));
   1055 
   1056 	if (cpun >= NCPU || cpun < 0 || !CPU_IN_SET(cpu_available, cpun))
   1057 		return (NULL);
   1058 	return (cpu[cpun]);
   1059 }
   1060 
   1061 /*
   1062  * Check whether cpun is a valid processor id and whether it should be
   1063  * visible from the current zone. If it is, return a pointer to the
   1064  * associated CPU structure.
   1065  */
   1066 cpu_t *
   1067 cpu_get(processorid_t cpun)
   1068 {
   1069 	cpu_t *c;
   1070 
   1071 	ASSERT(MUTEX_HELD(&cpu_lock));
   1072 	c = cpu_get_all(cpun);
   1073 	if (c != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
   1074 	    zone_pset_get(curproc->p_zone) != cpupart_query_cpu(c))
   1075 		return (NULL);
   1076 	return (c);
   1077 }
   1078 
   1079 /*
   1080  * The following functions should be used to check CPU states in the kernel.
   1081  * They should be invoked with cpu_lock held.  Kernel subsystems interested
   1082  * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc
   1083  * states.  Those are for user-land (and system call) use only.
   1084  */
   1085 
   1086 /*
   1087  * Determine whether the CPU is online and handling interrupts.
   1088  */
   1089 int
   1090 cpu_is_online(cpu_t *cpu)
   1091 {
   1092 	ASSERT(MUTEX_HELD(&cpu_lock));
   1093 	return (cpu_flagged_online(cpu->cpu_flags));
   1094 }
   1095 
   1096 /*
   1097  * Determine whether the CPU is offline (this includes spare and faulted).
   1098  */
   1099 int
   1100 cpu_is_offline(cpu_t *cpu)
   1101 {
   1102 	ASSERT(MUTEX_HELD(&cpu_lock));
   1103 	return (cpu_flagged_offline(cpu->cpu_flags));
   1104 }
   1105 
   1106 /*
   1107  * Determine whether the CPU is powered off.
   1108  */
   1109 int
   1110 cpu_is_poweredoff(cpu_t *cpu)
   1111 {
   1112 	ASSERT(MUTEX_HELD(&cpu_lock));
   1113 	return (cpu_flagged_poweredoff(cpu->cpu_flags));
   1114 }
   1115 
   1116 /*
   1117  * Determine whether the CPU is handling interrupts.
   1118  */
   1119 int
   1120 cpu_is_nointr(cpu_t *cpu)
   1121 {
   1122 	ASSERT(MUTEX_HELD(&cpu_lock));
   1123 	return (cpu_flagged_nointr(cpu->cpu_flags));
   1124 }
   1125 
   1126 /*
   1127  * Determine whether the CPU is active (scheduling threads).
   1128  */
   1129 int
   1130 cpu_is_active(cpu_t *cpu)
   1131 {
   1132 	ASSERT(MUTEX_HELD(&cpu_lock));
   1133 	return (cpu_flagged_active(cpu->cpu_flags));
   1134 }
   1135 
   1136 /*
   1137  * Same as above, but these require cpu_flags instead of cpu_t pointers.
   1138  */
   1139 int
   1140 cpu_flagged_online(cpu_flag_t cpu_flags)
   1141 {
   1142 	return (cpu_flagged_active(cpu_flags) &&
   1143 	    (cpu_flags & CPU_ENABLE));
   1144 }
   1145 
   1146 int
   1147 cpu_flagged_offline(cpu_flag_t cpu_flags)
   1148 {
   1149 	return (((cpu_flags & CPU_POWEROFF) == 0) &&
   1150 	    ((cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY));
   1151 }
   1152 
   1153 int
   1154 cpu_flagged_poweredoff(cpu_flag_t cpu_flags)
   1155 {
   1156 	return ((cpu_flags & CPU_POWEROFF) == CPU_POWEROFF);
   1157 }
   1158 
   1159 int
   1160 cpu_flagged_nointr(cpu_flag_t cpu_flags)
   1161 {
   1162 	return (cpu_flagged_active(cpu_flags) &&
   1163 	    (cpu_flags & CPU_ENABLE) == 0);
   1164 }
   1165 
   1166 int
   1167 cpu_flagged_active(cpu_flag_t cpu_flags)
   1168 {
   1169 	return (((cpu_flags & (CPU_POWEROFF | CPU_FAULTED | CPU_SPARE)) == 0) &&
   1170 	    ((cpu_flags & (CPU_READY | CPU_OFFLINE)) == CPU_READY));
   1171 }
   1172 
   1173 /*
   1174  * Bring the indicated CPU online.
   1175  */
   1176 int
   1177 cpu_online(cpu_t *cp)
   1178 {
   1179 	int	error = 0;
   1180 
   1181 	/*
   1182 	 * Handle on-line request.
   1183 	 *	This code must put the new CPU on the active list before
   1184 	 *	starting it because it will not be paused, and will start
   1185 	 * 	using the active list immediately.  The real start occurs
   1186 	 *	when the CPU_QUIESCED flag is turned off.
   1187 	 */
   1188 
   1189 	ASSERT(MUTEX_HELD(&cpu_lock));
   1190 
   1191 	/*
   1192 	 * Put all the cpus into a known safe place.
   1193 	 * No mutexes can be entered while CPUs are paused.
   1194 	 */
   1195 	error = mp_cpu_start(cp);	/* arch-dep hook */
   1196 	if (error == 0) {
   1197 		pg_cpupart_in(cp, cp->cpu_part);
   1198 		pause_cpus(NULL);
   1199 		cpu_add_active_internal(cp);
   1200 		if (cp->cpu_flags & CPU_FAULTED) {
   1201 			cp->cpu_flags &= ~CPU_FAULTED;
   1202 			mp_cpu_faulted_exit(cp);
   1203 		}
   1204 		cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN |
   1205 		    CPU_SPARE);
   1206 		start_cpus();
   1207 		cpu_stats_kstat_create(cp);
   1208 		cpu_create_intrstat(cp);
   1209 		lgrp_kstat_create(cp);
   1210 		cpu_state_change_notify(cp->cpu_id, CPU_ON);
   1211 		cpu_intr_enable(cp);	/* arch-dep hook */
   1212 		cpu_set_state(cp);
   1213 		cyclic_online(cp);
   1214 		/*
   1215 		 * This has to be called only after cyclic_online(). This
   1216 		 * function uses cyclics.
   1217 		 */
   1218 		callout_cpu_online(cp);
   1219 		poke_cpu(cp->cpu_id);
   1220 	}
   1221 
   1222 	return (error);
   1223 }
   1224 
   1225 /*
   1226  * Take the indicated CPU offline.
   1227  */
   1228 int
   1229 cpu_offline(cpu_t *cp, int flags)
   1230 {
   1231 	cpupart_t *pp;
   1232 	int	error = 0;
   1233 	cpu_t	*ncp;
   1234 	int	intr_enable;
   1235 	int	cyclic_off = 0;
   1236 	int	callout_off = 0;
   1237 	int	loop_count;
   1238 	int	no_quiesce = 0;
   1239 	int	(*bound_func)(struct cpu *, int);
   1240 	kthread_t *t;
   1241 	lpl_t	*cpu_lpl;
   1242 	proc_t	*p;
   1243 	int	lgrp_diff_lpl;
   1244 	boolean_t unbind_all_threads = (flags & CPU_FORCED) != 0;
   1245 
   1246 	ASSERT(MUTEX_HELD(&cpu_lock));
   1247 
   1248 	/*
   1249 	 * If we're going from faulted or spare to offline, just
   1250 	 * clear these flags and update CPU state.
   1251 	 */
   1252 	if (cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) {
   1253 		if (cp->cpu_flags & CPU_FAULTED) {
   1254 			cp->cpu_flags &= ~CPU_FAULTED;
   1255 			mp_cpu_faulted_exit(cp);
   1256 		}
   1257 		cp->cpu_flags &= ~CPU_SPARE;
   1258 		cpu_set_state(cp);
   1259 		return (0);
   1260 	}
   1261 
   1262 	/*
   1263 	 * Handle off-line request.
   1264 	 */
   1265 	pp = cp->cpu_part;
   1266 	/*
   1267 	 * Don't offline last online CPU in partition
   1268 	 */
   1269 	if (ncpus_online <= 1 || pp->cp_ncpus <= 1 || cpu_intr_count(cp) < 2)
   1270 		return (EBUSY);
   1271 	/*
   1272 	 * Unbind all soft-bound threads bound to our CPU and hard bound threads
   1273 	 * if we were asked to.
   1274 	 */
   1275 	error = cpu_unbind(cp->cpu_id, unbind_all_threads);
   1276 	if (error != 0)
   1277 		return (error);
   1278 	/*
   1279 	 * We shouldn't be bound to this CPU ourselves.
   1280 	 */
   1281 	if (curthread->t_bound_cpu == cp)
   1282 		return (EBUSY);
   1283 
   1284 	/*
   1285 	 * Tell interested parties that this CPU is going offline.
   1286 	 */
   1287 	cpu_state_change_notify(cp->cpu_id, CPU_OFF);
   1288 
   1289 	/*
   1290 	 * Tell the PG subsystem that the CPU is leaving the partition
   1291 	 */
   1292 	pg_cpupart_out(cp, pp);
   1293 
   1294 	/*
   1295 	 * Take the CPU out of interrupt participation so we won't find
   1296 	 * bound kernel threads.  If the architecture cannot completely
   1297 	 * shut off interrupts on the CPU, don't quiesce it, but don't
   1298 	 * run anything but interrupt thread... this is indicated by
   1299 	 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being
   1300 	 * off.
   1301 	 */
   1302 	intr_enable = cp->cpu_flags & CPU_ENABLE;
   1303 	if (intr_enable)
   1304 		no_quiesce = cpu_intr_disable(cp);
   1305 
   1306 	/*
   1307 	 * Record that we are aiming to offline this cpu.  This acts as
   1308 	 * a barrier to further weak binding requests in thread_nomigrate
   1309 	 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to
   1310 	 * lean away from this cpu.  Further strong bindings are already
   1311 	 * avoided since we hold cpu_lock.  Since threads that are set
   1312 	 * runnable around now and others coming off the target cpu are
   1313 	 * directed away from the target, existing strong and weak bindings
   1314 	 * (especially the latter) to the target cpu stand maximum chance of
   1315 	 * being able to unbind during the short delay loop below (if other
   1316 	 * unbound threads compete they may not see cpu in time to unbind
   1317 	 * even if they would do so immediately.
   1318 	 */
   1319 	cpu_inmotion = cp;
   1320 	membar_enter();
   1321 
   1322 	/*
   1323 	 * Check for kernel threads (strong or weak) bound to that CPU.
   1324 	 * Strongly bound threads may not unbind, and we'll have to return
   1325 	 * EBUSY.  Weakly bound threads should always disappear - we've
   1326 	 * stopped more weak binding with cpu_inmotion and existing
   1327 	 * bindings will drain imminently (they may not block).  Nonetheless
   1328 	 * we will wait for a fixed period for all bound threads to disappear.
   1329 	 * Inactive interrupt threads are OK (they'll be in TS_FREE
   1330 	 * state).  If test finds some bound threads, wait a few ticks
   1331 	 * to give short-lived threads (such as interrupts) chance to
   1332 	 * complete.  Note that if no_quiesce is set, i.e. this cpu
   1333 	 * is required to service interrupts, then we take the route
   1334 	 * that permits interrupt threads to be active (or bypassed).
   1335 	 */
   1336 	bound_func = no_quiesce ? disp_bound_threads : disp_bound_anythreads;
   1337 
   1338 again:	for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) {
   1339 		if (loop_count >= 5) {
   1340 			error = EBUSY;	/* some threads still bound */
   1341 			break;
   1342 		}
   1343 
   1344 		/*
   1345 		 * If some threads were assigned, give them
   1346 		 * a chance to complete or move.
   1347 		 *
   1348 		 * This assumes that the clock_thread is not bound
   1349 		 * to any CPU, because the clock_thread is needed to
   1350 		 * do the delay(hz/100).
   1351 		 *
   1352 		 * Note: we still hold the cpu_lock while waiting for
   1353 		 * the next clock tick.  This is OK since it isn't
   1354 		 * needed for anything else except processor_bind(2),
   1355 		 * and system initialization.  If we drop the lock,
   1356 		 * we would risk another p_online disabling the last
   1357 		 * processor.
   1358 		 */
   1359 		delay(hz/100);
   1360 	}
   1361 
   1362 	if (error == 0 && callout_off == 0) {
   1363 		callout_cpu_offline(cp);
   1364 		callout_off = 1;
   1365 	}
   1366 
   1367 	if (error == 0 && cyclic_off == 0) {
   1368 		if (!cyclic_offline(cp)) {
   1369 			/*
   1370 			 * We must have bound cyclics...
   1371 			 */
   1372 			error = EBUSY;
   1373 			goto out;
   1374 		}
   1375 		cyclic_off = 1;
   1376 	}
   1377 
   1378 	/*
   1379 	 * Call mp_cpu_stop() to perform any special operations
   1380 	 * needed for this machine architecture to offline a CPU.
   1381 	 */
   1382 	if (error == 0)
   1383 		error = mp_cpu_stop(cp);	/* arch-dep hook */
   1384 
   1385 	/*
   1386 	 * If that all worked, take the CPU offline and decrement
   1387 	 * ncpus_online.
   1388 	 */
   1389 	if (error == 0) {
   1390 		/*
   1391 		 * Put all the cpus into a known safe place.
   1392 		 * No mutexes can be entered while CPUs are paused.
   1393 		 */
   1394 		pause_cpus(cp);
   1395 		/*
   1396 		 * Repeat the operation, if necessary, to make sure that
   1397 		 * all outstanding low-level interrupts run to completion
   1398 		 * before we set the CPU_QUIESCED flag.  It's also possible
   1399 		 * that a thread has weak bound to the cpu despite our raising
   1400 		 * cpu_inmotion above since it may have loaded that
   1401 		 * value before the barrier became visible (this would have
   1402 		 * to be the thread that was on the target cpu at the time
   1403 		 * we raised the barrier).
   1404 		 */
   1405 		if ((!no_quiesce && cp->cpu_intr_actv != 0) ||
   1406 		    (*bound_func)(cp, 1)) {
   1407 			start_cpus();
   1408 			(void) mp_cpu_start(cp);
   1409 			goto again;
   1410 		}
   1411 		ncp = cp->cpu_next_part;
   1412 		cpu_lpl = cp->cpu_lpl;
   1413 		ASSERT(cpu_lpl != NULL);
   1414 
   1415 		/*
   1416 		 * Remove the CPU from the list of active CPUs.
   1417 		 */
   1418 		cpu_remove_active(cp);
   1419 
   1420 		/*
   1421 		 * Walk the active process list and look for threads
   1422 		 * whose home lgroup needs to be updated, or
   1423 		 * the last CPU they run on is the one being offlined now.
   1424 		 */
   1425 
   1426 		ASSERT(curthread->t_cpu != cp);
   1427 		for (p = practive; p != NULL; p = p->p_next) {
   1428 
   1429 			t = p->p_tlist;
   1430 
   1431 			if (t == NULL)
   1432 				continue;
   1433 
   1434 			lgrp_diff_lpl = 0;
   1435 
   1436 			do {
   1437 				ASSERT(t->t_lpl != NULL);
   1438 				/*
   1439 				 * Taking last CPU in lpl offline
   1440 				 * Rehome thread if it is in this lpl
   1441 				 * Otherwise, update the count of how many
   1442 				 * threads are in this CPU's lgroup but have
   1443 				 * a different lpl.
   1444 				 */
   1445 
   1446 				if (cpu_lpl->lpl_ncpu == 0) {
   1447 					if (t->t_lpl == cpu_lpl)
   1448 						lgrp_move_thread(t,
   1449 						    lgrp_choose(t,
   1450 						    t->t_cpupart), 0);
   1451 					else if (t->t_lpl->lpl_lgrpid ==
   1452 					    cpu_lpl->lpl_lgrpid)
   1453 						lgrp_diff_lpl++;
   1454 				}
   1455 				ASSERT(t->t_lpl->lpl_ncpu > 0);
   1456 
   1457 				/*
   1458 				 * Update CPU last ran on if it was this CPU
   1459 				 */
   1460 				if (t->t_cpu == cp && t->t_bound_cpu != cp)
   1461 					t->t_cpu = disp_lowpri_cpu(ncp,
   1462 					    t->t_lpl, t->t_pri, NULL);
   1463 				ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp ||
   1464 				    t->t_weakbound_cpu == cp);
   1465 
   1466 				t = t->t_forw;
   1467 			} while (t != p->p_tlist);
   1468 
   1469 			/*
   1470 			 * Didn't find any threads in the same lgroup as this
   1471 			 * CPU with a different lpl, so remove the lgroup from
   1472 			 * the process lgroup bitmask.
   1473 			 */
   1474 
   1475 			if (lgrp_diff_lpl == 0)
   1476 				klgrpset_del(p->p_lgrpset, cpu_lpl->lpl_lgrpid);
   1477 		}
   1478 
   1479 		/*
   1480 		 * Walk thread list looking for threads that need to be
   1481 		 * rehomed, since there are some threads that are not in
   1482 		 * their process's p_tlist.
   1483 		 */
   1484 
   1485 		t = curthread;
   1486 		do {
   1487 			ASSERT(t != NULL && t->t_lpl != NULL);
   1488 
   1489 			/*
   1490 			 * Rehome threads with same lpl as this CPU when this
   1491 			 * is the last CPU in the lpl.
   1492 			 */
   1493 
   1494 			if ((cpu_lpl->lpl_ncpu == 0) && (t->t_lpl == cpu_lpl))
   1495 				lgrp_move_thread(t,
   1496 				    lgrp_choose(t, t->t_cpupart), 1);
   1497 
   1498 			ASSERT(t->t_lpl->lpl_ncpu > 0);
   1499 
   1500 			/*
   1501 			 * Update CPU last ran on if it was this CPU
   1502 			 */
   1503 
   1504 			if (t->t_cpu == cp && t->t_bound_cpu != cp) {
   1505 				t->t_cpu = disp_lowpri_cpu(ncp,
   1506 				    t->t_lpl, t->t_pri, NULL);
   1507 			}
   1508 			ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp ||
   1509 			    t->t_weakbound_cpu == cp);
   1510 			t = t->t_next;
   1511 
   1512 		} while (t != curthread);
   1513 		ASSERT((cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) == 0);
   1514 		cp->cpu_flags |= CPU_OFFLINE;
   1515 		disp_cpu_inactive(cp);
   1516 		if (!no_quiesce)
   1517 			cp->cpu_flags |= CPU_QUIESCED;
   1518 		ncpus_online--;
   1519 		cpu_set_state(cp);
   1520 		cpu_inmotion = NULL;
   1521 		start_cpus();
   1522 		cpu_stats_kstat_destroy(cp);
   1523 		cpu_delete_intrstat(cp);
   1524 		lgrp_kstat_destroy(cp);
   1525 	}
   1526 
   1527 out:
   1528 	cpu_inmotion = NULL;
   1529 
   1530 	/*
   1531 	 * If we failed, re-enable interrupts.
   1532 	 * Do this even if cpu_intr_disable returned an error, because
   1533 	 * it may have partially disabled interrupts.
   1534 	 */
   1535 	if (error && intr_enable)
   1536 		cpu_intr_enable(cp);
   1537 
   1538 	/*
   1539 	 * If we failed, but managed to offline the cyclic subsystem on this
   1540 	 * CPU, bring it back online.
   1541 	 */
   1542 	if (error && cyclic_off)
   1543 		cyclic_online(cp);
   1544 
   1545 	/*
   1546 	 * If we failed, but managed to offline callouts on this CPU,
   1547 	 * bring it back online.
   1548 	 */
   1549 	if (error && callout_off)
   1550 		callout_cpu_online(cp);
   1551 
   1552 	/*
   1553 	 * If we failed, tell the PG subsystem that the CPU is back
   1554 	 */
   1555 	pg_cpupart_in(cp, pp);
   1556 
   1557 	/*
   1558 	 * If we failed, we need to notify everyone that this CPU is back on.
   1559 	 */
   1560 	if (error != 0)
   1561 		cpu_state_change_notify(cp->cpu_id, CPU_ON);
   1562 
   1563 	return (error);
   1564 }
   1565 
   1566 /*
   1567  * Mark the indicated CPU as faulted, taking it offline.
   1568  */
   1569 int
   1570 cpu_faulted(cpu_t *cp, int flags)
   1571 {
   1572 	int	error = 0;
   1573 
   1574 	ASSERT(MUTEX_HELD(&cpu_lock));
   1575 	ASSERT(!cpu_is_poweredoff(cp));
   1576 
   1577 	if (cpu_is_offline(cp)) {
   1578 		cp->cpu_flags &= ~CPU_SPARE;
   1579 		cp->cpu_flags |= CPU_FAULTED;
   1580 		mp_cpu_faulted_enter(cp);
   1581 		cpu_set_state(cp);
   1582 		return (0);
   1583 	}
   1584 
   1585 	if ((error = cpu_offline(cp, flags)) == 0) {
   1586 		cp->cpu_flags |= CPU_FAULTED;
   1587 		mp_cpu_faulted_enter(cp);
   1588 		cpu_set_state(cp);
   1589 	}
   1590 
   1591 	return (error);
   1592 }
   1593 
   1594 /*
   1595  * Mark the indicated CPU as a spare, taking it offline.
   1596  */
   1597 int
   1598 cpu_spare(cpu_t *cp, int flags)
   1599 {
   1600 	int	error = 0;
   1601 
   1602 	ASSERT(MUTEX_HELD(&cpu_lock));
   1603 	ASSERT(!cpu_is_poweredoff(cp));
   1604 
   1605 	if (cpu_is_offline(cp)) {
   1606 		if (cp->cpu_flags & CPU_FAULTED) {
   1607 			cp->cpu_flags &= ~CPU_FAULTED;
   1608 			mp_cpu_faulted_exit(cp);
   1609 		}
   1610 		cp->cpu_flags |= CPU_SPARE;
   1611 		cpu_set_state(cp);
   1612 		return (0);
   1613 	}
   1614 
   1615 	if ((error = cpu_offline(cp, flags)) == 0) {
   1616 		cp->cpu_flags |= CPU_SPARE;
   1617 		cpu_set_state(cp);
   1618 	}
   1619 
   1620 	return (error);
   1621 }
   1622 
   1623 /*
   1624  * Take the indicated CPU from poweroff to offline.
   1625  */
   1626 int
   1627 cpu_poweron(cpu_t *cp)
   1628 {
   1629 	int	error = ENOTSUP;
   1630 
   1631 	ASSERT(MUTEX_HELD(&cpu_lock));
   1632 	ASSERT(cpu_is_poweredoff(cp));
   1633 
   1634 	error = mp_cpu_poweron(cp);	/* arch-dep hook */
   1635 	if (error == 0)
   1636 		cpu_set_state(cp);
   1637 
   1638 	return (error);
   1639 }
   1640 
   1641 /*
   1642  * Take the indicated CPU from any inactive state to powered off.
   1643  */
   1644 int
   1645 cpu_poweroff(cpu_t *cp)
   1646 {
   1647 	int	error = ENOTSUP;
   1648 
   1649 	ASSERT(MUTEX_HELD(&cpu_lock));
   1650 	ASSERT(cpu_is_offline(cp));
   1651 
   1652 	if (!(cp->cpu_flags & CPU_QUIESCED))
   1653 		return (EBUSY);		/* not completely idle */
   1654 
   1655 	error = mp_cpu_poweroff(cp);	/* arch-dep hook */
   1656 	if (error == 0)
   1657 		cpu_set_state(cp);
   1658 
   1659 	return (error);
   1660 }
   1661 
   1662 /*
   1663  * Initialize the Sequential CPU id lookup table
   1664  */
   1665 void
   1666 cpu_seq_tbl_init()
   1667 {
   1668 	cpu_t	**tbl;
   1669 
   1670 	tbl = kmem_zalloc(sizeof (struct cpu *) * max_ncpus, KM_SLEEP);
   1671 	tbl[0] = CPU;
   1672 
   1673 	cpu_seq = tbl;
   1674 }
   1675 
   1676 /*
   1677  * Initialize the CPU lists for the first CPU.
   1678  */
   1679 void
   1680 cpu_list_init(cpu_t *cp)
   1681 {
   1682 	cp->cpu_next = cp;
   1683 	cp->cpu_prev = cp;
   1684 	cpu_list = cp;
   1685 	clock_cpu_list = cp;
   1686 
   1687 	cp->cpu_next_onln = cp;
   1688 	cp->cpu_prev_onln = cp;
   1689 	cpu_active = cp;
   1690 
   1691 	cp->cpu_seqid = 0;
   1692 	CPUSET_ADD(cpu_seqid_inuse, 0);
   1693 
   1694 	/*
   1695 	 * Bootstrap cpu_seq using cpu_list
   1696 	 * The cpu_seq[] table will be dynamically allocated
   1697 	 * when kmem later becomes available (but before going MP)
   1698 	 */
   1699 	cpu_seq = &cpu_list;
   1700 
   1701 	cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid);
   1702 	cp_default.cp_cpulist = cp;
   1703 	cp_default.cp_ncpus = 1;
   1704 	cp->cpu_next_part = cp;
   1705 	cp->cpu_prev_part = cp;
   1706 	cp->cpu_part = &cp_default;
   1707 
   1708 	CPUSET_ADD(cpu_available, cp->cpu_id);
   1709 }
   1710 
   1711 /*
   1712  * Insert a CPU into the list of available CPUs.
   1713  */
   1714 void
   1715 cpu_add_unit(cpu_t *cp)
   1716 {
   1717 	int seqid;
   1718 
   1719 	ASSERT(MUTEX_HELD(&cpu_lock));
   1720 	ASSERT(cpu_list != NULL);	/* list started in cpu_list_init */
   1721 
   1722 	lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)cp, 0);
   1723 
   1724 	/*
   1725 	 * Note: most users of the cpu_list will grab the
   1726 	 * cpu_lock to insure that it isn't modified.  However,
   1727 	 * certain users can't or won't do that.  To allow this
   1728 	 * we pause the other cpus.  Users who walk the list
   1729 	 * without cpu_lock, must disable kernel preemption
   1730 	 * to insure that the list isn't modified underneath
   1731 	 * them.  Also, any cached pointers to cpu structures
   1732 	 * must be revalidated by checking to see if the
   1733 	 * cpu_next pointer points to itself.  This check must
   1734 	 * be done with the cpu_lock held or kernel preemption
   1735 	 * disabled.  This check relies upon the fact that
   1736 	 * old cpu structures are not free'ed or cleared after
   1737 	 * then are removed from the cpu_list.
   1738 	 *
   1739 	 * Note that the clock code walks the cpu list dereferencing
   1740 	 * the cpu_part pointer, so we need to initialize it before
   1741 	 * adding the cpu to the list.
   1742 	 */
   1743 	cp->cpu_part = &cp_default;
   1744 	(void) pause_cpus(NULL);
   1745 	cp->cpu_next = cpu_list;
   1746 	cp->cpu_prev = cpu_list->cpu_prev;
   1747 	cpu_list->cpu_prev->cpu_next = cp;
   1748 	cpu_list->cpu_prev = cp;
   1749 	start_cpus();
   1750 
   1751 	for (seqid = 0; CPU_IN_SET(cpu_seqid_inuse, seqid); seqid++)
   1752 		continue;
   1753 	CPUSET_ADD(cpu_seqid_inuse, seqid);
   1754 	cp->cpu_seqid = seqid;
   1755 	ASSERT(ncpus < max_ncpus);
   1756 	ncpus++;
   1757 	cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid);
   1758 	cpu[cp->cpu_id] = cp;
   1759 	CPUSET_ADD(cpu_available, cp->cpu_id);
   1760 	cpu_seq[cp->cpu_seqid] = cp;
   1761 
   1762 	/*
   1763 	 * allocate a pause thread for this CPU.
   1764 	 */
   1765 	cpu_pause_alloc(cp);
   1766 
   1767 	/*
   1768 	 * So that new CPUs won't have NULL prev_onln and next_onln pointers,
   1769 	 * link them into a list of just that CPU.
   1770 	 * This is so that disp_lowpri_cpu will work for thread_create in
   1771 	 * pause_cpus() when called from the startup thread in a new CPU.
   1772 	 */
   1773 	cp->cpu_next_onln = cp;
   1774 	cp->cpu_prev_onln = cp;
   1775 	cpu_info_kstat_create(cp);
   1776 	cp->cpu_next_part = cp;
   1777 	cp->cpu_prev_part = cp;
   1778 
   1779 	init_cpu_mstate(cp, CMS_SYSTEM);
   1780 
   1781 	pool_pset_mod = gethrtime();
   1782 }
   1783 
   1784 /*
   1785  * Do the opposite of cpu_add_unit().
   1786  */
   1787 void
   1788 cpu_del_unit(int cpuid)
   1789 {
   1790 	struct cpu	*cp, *cpnext;
   1791 
   1792 	ASSERT(MUTEX_HELD(&cpu_lock));
   1793 	cp = cpu[cpuid];
   1794 	ASSERT(cp != NULL);
   1795 
   1796 	ASSERT(cp->cpu_next_onln == cp);
   1797 	ASSERT(cp->cpu_prev_onln == cp);
   1798 	ASSERT(cp->cpu_next_part == cp);
   1799 	ASSERT(cp->cpu_prev_part == cp);
   1800 
   1801 	/*
   1802 	 * Tear down the CPU's physical ID cache, and update any
   1803 	 * processor groups
   1804 	 */
   1805 	pg_cpu_fini(cp);
   1806 	pghw_physid_destroy(cp);
   1807 
   1808 	/*
   1809 	 * Destroy kstat stuff.
   1810 	 */
   1811 	cpu_info_kstat_destroy(cp);
   1812 	term_cpu_mstate(cp);
   1813 	/*
   1814 	 * Free up pause thread.
   1815 	 */
   1816 	cpu_pause_free(cp);
   1817 	CPUSET_DEL(cpu_available, cp->cpu_id);
   1818 	cpu[cp->cpu_id] = NULL;
   1819 	cpu_seq[cp->cpu_seqid] = NULL;
   1820 
   1821 	/*
   1822 	 * The clock thread and mutex_vector_enter cannot hold the
   1823 	 * cpu_lock while traversing the cpu list, therefore we pause
   1824 	 * all other threads by pausing the other cpus. These, and any
   1825 	 * other routines holding cpu pointers while possibly sleeping
   1826 	 * must be sure to call kpreempt_disable before processing the
   1827 	 * list and be sure to check that the cpu has not been deleted
   1828 	 * after any sleeps (check cp->cpu_next != NULL). We guarantee
   1829 	 * to keep the deleted cpu structure around.
   1830 	 *
   1831 	 * Note that this MUST be done AFTER cpu_available
   1832 	 * has been updated so that we don't waste time
   1833 	 * trying to pause the cpu we're trying to delete.
   1834 	 */
   1835 	(void) pause_cpus(NULL);
   1836 
   1837 	cpnext = cp->cpu_next;
   1838 	cp->cpu_prev->cpu_next = cp->cpu_next;
   1839 	cp->cpu_next->cpu_prev = cp->cpu_prev;
   1840 	if (cp == cpu_list)
   1841 		cpu_list = cpnext;
   1842 
   1843 	/*
   1844 	 * Signals that the cpu has been deleted (see above).
   1845 	 */
   1846 	cp->cpu_next = NULL;
   1847 	cp->cpu_prev = NULL;
   1848 
   1849 	start_cpus();
   1850 
   1851 	CPUSET_DEL(cpu_seqid_inuse, cp->cpu_seqid);
   1852 	ncpus--;
   1853 	lgrp_config(LGRP_CONFIG_CPU_DEL, (uintptr_t)cp, 0);
   1854 
   1855 	pool_pset_mod = gethrtime();
   1856 }
   1857 
   1858 /*
   1859  * Add a CPU to the list of active CPUs.
   1860  *	This routine must not get any locks, because other CPUs are paused.
   1861  */
   1862 static void
   1863 cpu_add_active_internal(cpu_t *cp)
   1864 {
   1865 	cpupart_t	*pp = cp->cpu_part;
   1866 
   1867 	ASSERT(MUTEX_HELD(&cpu_lock));
   1868 	ASSERT(cpu_list != NULL);	/* list started in cpu_list_init */
   1869 
   1870 	ncpus_online++;
   1871 	cpu_set_state(cp);
   1872 	cp->cpu_next_onln = cpu_active;
   1873 	cp->cpu_prev_onln = cpu_active->cpu_prev_onln;
   1874 	cpu_active->cpu_prev_onln->cpu_next_onln = cp;
   1875 	cpu_active->cpu_prev_onln = cp;
   1876 
   1877 	if (pp->cp_cpulist) {
   1878 		cp->cpu_next_part = pp->cp_cpulist;
   1879 		cp->cpu_prev_part = pp->cp_cpulist->cpu_prev_part;
   1880 		pp->cp_cpulist->cpu_prev_part->cpu_next_part = cp;
   1881 		pp->cp_cpulist->cpu_prev_part = cp;
   1882 	} else {
   1883 		ASSERT(pp->cp_ncpus == 0);
   1884 		pp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
   1885 	}
   1886 	pp->cp_ncpus++;
   1887 	if (pp->cp_ncpus == 1) {
   1888 		cp_numparts_nonempty++;
   1889 		ASSERT(cp_numparts_nonempty != 0);
   1890 	}
   1891 
   1892 	pg_cpu_active(cp);
   1893 	lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0);
   1894 
   1895 	bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg));
   1896 }
   1897 
   1898 /*
   1899  * Add a CPU to the list of active CPUs.
   1900  *	This is called from machine-dependent layers when a new CPU is started.
   1901  */
   1902 void
   1903 cpu_add_active(cpu_t *cp)
   1904 {
   1905 	pg_cpupart_in(cp, cp->cpu_part);
   1906 
   1907 	pause_cpus(NULL);
   1908 	cpu_add_active_internal(cp);
   1909 	start_cpus();
   1910 
   1911 	cpu_stats_kstat_create(cp);
   1912 	cpu_create_intrstat(cp);
   1913 	lgrp_kstat_create(cp);
   1914 	cpu_state_change_notify(cp->cpu_id, CPU_INIT);
   1915 }
   1916 
   1917 
   1918 /*
   1919  * Remove a CPU from the list of active CPUs.
   1920  *	This routine must not get any locks, because other CPUs are paused.
   1921  */
   1922 /* ARGSUSED */
   1923 static void
   1924 cpu_remove_active(cpu_t *cp)
   1925 {
   1926 	cpupart_t	*pp = cp->cpu_part;
   1927 
   1928 	ASSERT(MUTEX_HELD(&cpu_lock));
   1929 	ASSERT(cp->cpu_next_onln != cp);	/* not the last one */
   1930 	ASSERT(cp->cpu_prev_onln != cp);	/* not the last one */
   1931 
   1932 	pg_cpu_inactive(cp);
   1933 
   1934 	lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0);
   1935 
   1936 	if (cp == clock_cpu_list)
   1937 		clock_cpu_list = cp->cpu_next_onln;
   1938 
   1939 	cp->cpu_prev_onln->cpu_next_onln = cp->cpu_next_onln;
   1940 	cp->cpu_next_onln->cpu_prev_onln = cp->cpu_prev_onln;
   1941 	if (cpu_active == cp) {
   1942 		cpu_active = cp->cpu_next_onln;
   1943 	}
   1944 	cp->cpu_next_onln = cp;
   1945 	cp->cpu_prev_onln = cp;
   1946 
   1947 	cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
   1948 	cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
   1949 	if (pp->cp_cpulist == cp) {
   1950 		pp->cp_cpulist = cp->cpu_next_part;
   1951 		ASSERT(pp->cp_cpulist != cp);
   1952 	}
   1953 	cp->cpu_next_part = cp;
   1954 	cp->cpu_prev_part = cp;
   1955 	pp->cp_ncpus--;
   1956 	if (pp->cp_ncpus == 0) {
   1957 		cp_numparts_nonempty--;
   1958 		ASSERT(cp_numparts_nonempty != 0);
   1959 	}
   1960 }
   1961 
   1962 /*
   1963  * Routine used to setup a newly inserted CPU in preparation for starting
   1964  * it running code.
   1965  */
   1966 int
   1967 cpu_configure(int cpuid)
   1968 {
   1969 	int retval = 0;
   1970 
   1971 	ASSERT(MUTEX_HELD(&cpu_lock));
   1972 
   1973 	/*
   1974 	 * Some structures are statically allocated based upon
   1975 	 * the maximum number of cpus the system supports.  Do not
   1976 	 * try to add anything beyond this limit.
   1977 	 */
   1978 	if (cpuid < 0 || cpuid >= NCPU) {
   1979 		return (EINVAL);
   1980 	}
   1981 
   1982 	if ((cpu[cpuid] != NULL) && (cpu[cpuid]->cpu_flags != 0)) {
   1983 		return (EALREADY);
   1984 	}
   1985 
   1986 	if ((retval = mp_cpu_configure(cpuid)) != 0) {
   1987 		return (retval);
   1988 	}
   1989 
   1990 	cpu[cpuid]->cpu_flags = CPU_QUIESCED | CPU_OFFLINE | CPU_POWEROFF;
   1991 	cpu_set_state(cpu[cpuid]);
   1992 	retval = cpu_state_change_hooks(cpuid, CPU_CONFIG, CPU_UNCONFIG);
   1993 	if (retval != 0)
   1994 		(void) mp_cpu_unconfigure(cpuid);
   1995 
   1996 	return (retval);
   1997 }
   1998 
   1999 /*
   2000  * Routine used to cleanup a CPU that has been powered off.  This will
   2001  * destroy all per-cpu information related to this cpu.
   2002  */
   2003 int
   2004 cpu_unconfigure(int cpuid)
   2005 {
   2006 	int error;
   2007 
   2008 	ASSERT(MUTEX_HELD(&cpu_lock));
   2009 
   2010 	if (cpu[cpuid] == NULL) {
   2011 		return (ENODEV);
   2012 	}
   2013 
   2014 	if (cpu[cpuid]->cpu_flags == 0) {
   2015 		return (EALREADY);
   2016 	}
   2017 
   2018 	if ((cpu[cpuid]->cpu_flags & CPU_POWEROFF) == 0) {
   2019 		return (EBUSY);
   2020 	}
   2021 
   2022 	if (cpu[cpuid]->cpu_props != NULL) {
   2023 		(void) nvlist_free(cpu[cpuid]->cpu_props);
   2024 		cpu[cpuid]->cpu_props = NULL;
   2025 	}
   2026 
   2027 	error = cpu_state_change_hooks(cpuid, CPU_UNCONFIG, CPU_CONFIG);
   2028 
   2029 	if (error != 0)
   2030 		return (error);
   2031 
   2032 	return (mp_cpu_unconfigure(cpuid));
   2033 }
   2034 
   2035 /*
   2036  * Routines for registering and de-registering cpu_setup callback functions.
   2037  *
   2038  * Caller's context
   2039  *	These routines must not be called from a driver's attach(9E) or
   2040  *	detach(9E) entry point.
   2041  *
   2042  * NOTE: CPU callbacks should not block. They are called with cpu_lock held.
   2043  */
   2044 
   2045 /*
   2046  * Ideally, these would be dynamically allocated and put into a linked
   2047  * list; however that is not feasible because the registration routine
   2048  * has to be available before the kmem allocator is working (in fact,
   2049  * it is called by the kmem allocator init code).  In any case, there
   2050  * are quite a few extra entries for future users.
   2051  */
   2052 #define	NCPU_SETUPS	20
   2053 
   2054 struct cpu_setup {
   2055 	cpu_setup_func_t *func;
   2056 	void *arg;
   2057 } cpu_setups[NCPU_SETUPS];
   2058 
   2059 void
   2060 register_cpu_setup_func(cpu_setup_func_t *func, void *arg)
   2061 {
   2062 	int i;
   2063 
   2064 	ASSERT(MUTEX_HELD(&cpu_lock));
   2065 
   2066 	for (i = 0; i < NCPU_SETUPS; i++)
   2067 		if (cpu_setups[i].func == NULL)
   2068 			break;
   2069 	if (i >= NCPU_SETUPS)
   2070 		cmn_err(CE_PANIC, "Ran out of cpu_setup callback entries");
   2071 
   2072 	cpu_setups[i].func = func;
   2073 	cpu_setups[i].arg = arg;
   2074 }
   2075 
   2076 void
   2077 unregister_cpu_setup_func(cpu_setup_func_t *func, void *arg)
   2078 {
   2079 	int i;
   2080 
   2081 	ASSERT(MUTEX_HELD(&cpu_lock));
   2082 
   2083 	for (i = 0; i < NCPU_SETUPS; i++)
   2084 		if ((cpu_setups[i].func == func) &&
   2085 		    (cpu_setups[i].arg == arg))
   2086 			break;
   2087 	if (i >= NCPU_SETUPS)
   2088 		cmn_err(CE_PANIC, "Could not find cpu_setup callback to "
   2089 		    "deregister");
   2090 
   2091 	cpu_setups[i].func = NULL;
   2092 	cpu_setups[i].arg = 0;
   2093 }
   2094 
   2095 /*
   2096  * Call any state change hooks for this CPU, ignore any errors.
   2097  */
   2098 void
   2099 cpu_state_change_notify(int id, cpu_setup_t what)
   2100 {
   2101 	int i;
   2102 
   2103 	ASSERT(MUTEX_HELD(&cpu_lock));
   2104 
   2105 	for (i = 0; i < NCPU_SETUPS; i++) {
   2106 		if (cpu_setups[i].func != NULL) {
   2107 			cpu_setups[i].func(what, id, cpu_setups[i].arg);
   2108 		}
   2109 	}
   2110 }
   2111 
   2112 /*
   2113  * Call any state change hooks for this CPU, undo it if error found.
   2114  */
   2115 static int
   2116 cpu_state_change_hooks(int id, cpu_setup_t what, cpu_setup_t undo)
   2117 {
   2118 	int i;
   2119 	int retval = 0;
   2120 
   2121 	ASSERT(MUTEX_HELD(&cpu_lock));
   2122 
   2123 	for (i = 0; i < NCPU_SETUPS; i++) {
   2124 		if (cpu_setups[i].func != NULL) {
   2125 			retval = cpu_setups[i].func(what, id,
   2126 			    cpu_setups[i].arg);
   2127 			if (retval) {
   2128 				for (i--; i >= 0; i--) {
   2129 					if (cpu_setups[i].func != NULL)
   2130 						cpu_setups[i].func(undo,
   2131 						    id, cpu_setups[i].arg);
   2132 				}
   2133 				break;
   2134 			}
   2135 		}
   2136 	}
   2137 	return (retval);
   2138 }
   2139 
   2140 /*
   2141  * Export information about this CPU via the kstat mechanism.
   2142  */
   2143 static struct {
   2144 	kstat_named_t ci_state;
   2145 	kstat_named_t ci_state_begin;
   2146 	kstat_named_t ci_cpu_type;
   2147 	kstat_named_t ci_fpu_type;
   2148 	kstat_named_t ci_clock_MHz;
   2149 	kstat_named_t ci_chip_id;
   2150 	kstat_named_t ci_implementation;
   2151 	kstat_named_t ci_brandstr;
   2152 	kstat_named_t ci_core_id;
   2153 	kstat_named_t ci_curr_clock_Hz;
   2154 	kstat_named_t ci_supp_freq_Hz;
   2155 #if defined(__sparcv9)
   2156 	kstat_named_t ci_device_ID;
   2157 	kstat_named_t ci_cpu_fru;
   2158 #endif
   2159 #if defined(__x86)
   2160 	kstat_named_t ci_vendorstr;
   2161 	kstat_named_t ci_family;
   2162 	kstat_named_t ci_model;
   2163 	kstat_named_t ci_step;
   2164 	kstat_named_t ci_clogid;
   2165 	kstat_named_t ci_pkg_core_id;
   2166 	kstat_named_t ci_ncpuperchip;
   2167 	kstat_named_t ci_ncoreperchip;
   2168 	kstat_named_t ci_max_cstates;
   2169 	kstat_named_t ci_curr_cstate;
   2170 	kstat_named_t ci_sktstr;
   2171 #endif
   2172 } cpu_info_template = {
   2173 	{ "state",			KSTAT_DATA_CHAR },
   2174 	{ "state_begin",		KSTAT_DATA_LONG },
   2175 	{ "cpu_type",			KSTAT_DATA_CHAR },
   2176 	{ "fpu_type",			KSTAT_DATA_CHAR },
   2177 	{ "clock_MHz",			KSTAT_DATA_LONG },
   2178 	{ "chip_id",			KSTAT_DATA_LONG },
   2179 	{ "implementation",		KSTAT_DATA_STRING },
   2180 	{ "brand",			KSTAT_DATA_STRING },
   2181 	{ "core_id",			KSTAT_DATA_LONG },
   2182 	{ "current_clock_Hz",		KSTAT_DATA_UINT64 },
   2183 	{ "supported_frequencies_Hz",	KSTAT_DATA_STRING },
   2184 #if defined(__sparcv9)
   2185 	{ "device_ID",			KSTAT_DATA_UINT64 },
   2186 	{ "cpu_fru",			KSTAT_DATA_STRING },
   2187 #endif
   2188 #if defined(__x86)
   2189 	{ "vendor_id",			KSTAT_DATA_STRING },
   2190 	{ "family",			KSTAT_DATA_INT32 },
   2191 	{ "model",			KSTAT_DATA_INT32 },
   2192 	{ "stepping",			KSTAT_DATA_INT32 },
   2193 	{ "clog_id",			KSTAT_DATA_INT32 },
   2194 	{ "pkg_core_id",		KSTAT_DATA_LONG },
   2195 	{ "ncpu_per_chip",		KSTAT_DATA_INT32 },
   2196 	{ "ncore_per_chip",		KSTAT_DATA_INT32 },
   2197 	{ "supported_max_cstates",	KSTAT_DATA_INT32 },
   2198 	{ "current_cstate",		KSTAT_DATA_INT32 },
   2199 	{ "socket_type",		KSTAT_DATA_STRING },
   2200 #endif
   2201 };
   2202 
   2203 static kmutex_t cpu_info_template_lock;
   2204 
   2205 static int
   2206 cpu_info_kstat_update(kstat_t *ksp, int rw)
   2207 {
   2208 	cpu_t	*cp = ksp->ks_private;
   2209 	const char *pi_state;
   2210 
   2211 	if (rw == KSTAT_WRITE)
   2212 		return (EACCES);
   2213 
   2214 #if defined(__x86)
   2215 	/* Is the cpu still initialising itself? */
   2216 	if (cpuid_checkpass(cp, 1) == 0)
   2217 		return (ENXIO);
   2218 #endif
   2219 	switch (cp->cpu_type_info.pi_state) {
   2220 	case P_ONLINE:
   2221 		pi_state = PS_ONLINE;
   2222 		break;
   2223 	case P_POWEROFF:
   2224 		pi_state = PS_POWEROFF;
   2225 		break;
   2226 	case P_NOINTR:
   2227 		pi_state = PS_NOINTR;
   2228 		break;
   2229 	case P_FAULTED:
   2230 		pi_state = PS_FAULTED;
   2231 		break;
   2232 	case P_SPARE:
   2233 		pi_state = PS_SPARE;
   2234 		break;
   2235 	case P_OFFLINE:
   2236 		pi_state = PS_OFFLINE;
   2237 		break;
   2238 	default:
   2239 		pi_state = "unknown";
   2240 	}
   2241 	(void) strcpy(cpu_info_template.ci_state.value.c, pi_state);
   2242 	cpu_info_template.ci_state_begin.value.l = cp->cpu_state_begin;
   2243 	(void) strncpy(cpu_info_template.ci_cpu_type.value.c,
   2244 	    cp->cpu_type_info.pi_processor_type, 15);
   2245 	(void) strncpy(cpu_info_template.ci_fpu_type.value.c,
   2246 	    cp->cpu_type_info.pi_fputypes, 15);
   2247 	cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock;
   2248 	cpu_info_template.ci_chip_id.value.l =
   2249 	    pg_plat_hw_instance_id(cp, PGHW_CHIP);
   2250 	kstat_named_setstr(&cpu_info_template.ci_implementation,
   2251 	    cp->cpu_idstr);
   2252 	kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr);
   2253 	cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp);
   2254 	cpu_info_template.ci_curr_clock_Hz.value.ui64 =
   2255 	    cp->cpu_curr_clock;
   2256 	kstat_named_setstr(&cpu_info_template.ci_supp_freq_Hz,
   2257 	    cp->cpu_supp_freqs);
   2258 #if defined(__sparcv9)
   2259 	cpu_info_template.ci_device_ID.value.ui64 =
   2260 	    cpunodes[cp->cpu_id].device_id;
   2261 	kstat_named_setstr(&cpu_info_template.ci_cpu_fru, cpu_fru_fmri(cp));
   2262 #endif
   2263 #if defined(__x86)
   2264 	kstat_named_setstr(&cpu_info_template.ci_vendorstr,
   2265 	    cpuid_getvendorstr(cp));
   2266 	cpu_info_template.ci_family.value.l = cpuid_getfamily(cp);
   2267 	cpu_info_template.ci_model.value.l = cpuid_getmodel(cp);
   2268 	cpu_info_template.ci_step.value.l = cpuid_getstep(cp);
   2269 	cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp);
   2270 	cpu_info_template.ci_ncpuperchip.value.l = cpuid_get_ncpu_per_chip(cp);
   2271 	cpu_info_template.ci_ncoreperchip.value.l =
   2272 	    cpuid_get_ncore_per_chip(cp);
   2273 	cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp);
   2274 	cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates;
   2275 	cpu_info_template.ci_curr_cstate.value.l = cpu_idle_get_cpu_state(cp);
   2276 	kstat_named_setstr(&cpu_info_template.ci_sktstr,
   2277 	    cpuid_getsocketstr(cp));
   2278 #endif
   2279 
   2280 	return (0);
   2281 }
   2282 
   2283 static void
   2284 cpu_info_kstat_create(cpu_t *cp)
   2285 {
   2286 	zoneid_t zoneid;
   2287 
   2288 	ASSERT(MUTEX_HELD(&cpu_lock));
   2289 
   2290 	if (pool_pset_enabled())
   2291 		zoneid = GLOBAL_ZONEID;
   2292 	else
   2293 		zoneid = ALL_ZONES;
   2294 	if ((cp->cpu_info_kstat = kstat_create_zone("cpu_info", cp->cpu_id,
   2295 	    NULL, "misc", KSTAT_TYPE_NAMED,
   2296 	    sizeof (cpu_info_template) / sizeof (kstat_named_t),
   2297 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE, zoneid)) != NULL) {
   2298 		cp->cpu_info_kstat->ks_data_size += 2 * CPU_IDSTRLEN;
   2299 #if defined(__sparcv9)
   2300 		cp->cpu_info_kstat->ks_data_size +=
   2301 		    strlen(cpu_fru_fmri(cp)) + 1;
   2302 #endif
   2303 #if defined(__x86)
   2304 		cp->cpu_info_kstat->ks_data_size += X86_VENDOR_STRLEN;
   2305 #endif
   2306 		if (cp->cpu_supp_freqs != NULL)
   2307 			cp->cpu_info_kstat->ks_data_size +=
   2308 			    strlen(cp->cpu_supp_freqs) + 1;
   2309 		cp->cpu_info_kstat->ks_lock = &cpu_info_template_lock;
   2310 		cp->cpu_info_kstat->ks_data = &cpu_info_template;
   2311 		cp->cpu_info_kstat->ks_private = cp;
   2312 		cp->cpu_info_kstat->ks_update = cpu_info_kstat_update;
   2313 		kstat_install(cp->cpu_info_kstat);
   2314 	}
   2315 }
   2316 
   2317 static void
   2318 cpu_info_kstat_destroy(cpu_t *cp)
   2319 {
   2320 	ASSERT(MUTEX_HELD(&cpu_lock));
   2321 
   2322 	kstat_delete(cp->cpu_info_kstat);
   2323 	cp->cpu_info_kstat = NULL;
   2324 }
   2325 
   2326 /*
   2327  * Create and install kstats for the boot CPU.
   2328  */
   2329 void
   2330 cpu_kstat_init(cpu_t *cp)
   2331 {
   2332 	mutex_enter(&cpu_lock);
   2333 	cpu_info_kstat_create(cp);
   2334 	cpu_stats_kstat_create(cp);
   2335 	cpu_create_intrstat(cp);
   2336 	cpu_set_state(cp);
   2337 	mutex_exit(&cpu_lock);
   2338 }
   2339 
   2340 /*
   2341  * Make visible to the zone that subset of the cpu information that would be
   2342  * initialized when a cpu is configured (but still offline).
   2343  */
   2344 void
   2345 cpu_visibility_configure(cpu_t *cp, zone_t *zone)
   2346 {
   2347 	zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
   2348 
   2349 	ASSERT(MUTEX_HELD(&cpu_lock));
   2350 	ASSERT(pool_pset_enabled());
   2351 	ASSERT(cp != NULL);
   2352 
   2353 	if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
   2354 		zone->zone_ncpus++;
   2355 		ASSERT(zone->zone_ncpus <= ncpus);
   2356 	}
   2357 	if (cp->cpu_info_kstat != NULL)
   2358 		kstat_zone_add(cp->cpu_info_kstat, zoneid);
   2359 }
   2360 
   2361 /*
   2362  * Make visible to the zone that subset of the cpu information that would be
   2363  * initialized when a previously configured cpu is onlined.
   2364  */
   2365 void
   2366 cpu_visibility_online(cpu_t *cp, zone_t *zone)
   2367 {
   2368 	kstat_t *ksp;
   2369 	char name[sizeof ("cpu_stat") + 10];	/* enough for 32-bit cpuids */
   2370 	zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
   2371 	processorid_t cpun;
   2372 
   2373 	ASSERT(MUTEX_HELD(&cpu_lock));
   2374 	ASSERT(pool_pset_enabled());
   2375 	ASSERT(cp != NULL);
   2376 	ASSERT(cpu_is_active(cp));
   2377 
   2378 	cpun = cp->cpu_id;
   2379 	if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
   2380 		zone->zone_ncpus_online++;
   2381 		ASSERT(zone->zone_ncpus_online <= ncpus_online);
   2382 	}
   2383 	(void) snprintf(name, sizeof (name), "cpu_stat%d", cpun);
   2384 	if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES))
   2385 	    != NULL) {
   2386 		kstat_zone_add(ksp, zoneid);
   2387 		kstat_rele(ksp);
   2388 	}
   2389 	if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) {
   2390 		kstat_zone_add(ksp, zoneid);
   2391 		kstat_rele(ksp);
   2392 	}
   2393 	if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) {
   2394 		kstat_zone_add(ksp, zoneid);
   2395 		kstat_rele(ksp);
   2396 	}
   2397 	if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) !=
   2398 	    NULL) {
   2399 		kstat_zone_add(ksp, zoneid);
   2400 		kstat_rele(ksp);
   2401 	}
   2402 }
   2403 
   2404 /*
   2405  * Update relevant kstats such that cpu is now visible to processes
   2406  * executing in specified zone.
   2407  */
   2408 void
   2409 cpu_visibility_add(cpu_t *cp, zone_t *zone)
   2410 {
   2411 	cpu_visibility_configure(cp, zone);
   2412 	if (cpu_is_active(cp))
   2413 		cpu_visibility_online(cp, zone);
   2414 }
   2415 
   2416 /*
   2417  * Make invisible to the zone that subset of the cpu information that would be
   2418  * torn down when a previously offlined cpu is unconfigured.
   2419  */
   2420 void
   2421 cpu_visibility_unconfigure(cpu_t *cp, zone_t *zone)
   2422 {
   2423 	zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
   2424 
   2425 	ASSERT(MUTEX_HELD(&cpu_lock));
   2426 	ASSERT(pool_pset_enabled());
   2427 	ASSERT(cp != NULL);
   2428 
   2429 	if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
   2430 		ASSERT(zone->zone_ncpus != 0);
   2431 		zone->zone_ncpus--;
   2432 	}
   2433 	if (cp->cpu_info_kstat)
   2434 		kstat_zone_remove(cp->cpu_info_kstat, zoneid);
   2435 }
   2436 
   2437 /*
   2438  * Make invisible to the zone that subset of the cpu information that would be
   2439  * torn down when a cpu is offlined (but still configured).
   2440  */
   2441 void
   2442 cpu_visibility_offline(cpu_t *cp, zone_t *zone)
   2443 {
   2444 	kstat_t *ksp;
   2445 	char name[sizeof ("cpu_stat") + 10];	/* enough for 32-bit cpuids */
   2446 	zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
   2447 	processorid_t cpun;
   2448 
   2449 	ASSERT(MUTEX_HELD(&cpu_lock));
   2450 	ASSERT(pool_pset_enabled());
   2451 	ASSERT(cp != NULL);
   2452 	ASSERT(cpu_is_active(cp));
   2453 
   2454 	cpun = cp->cpu_id;
   2455 	if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
   2456 		ASSERT(zone->zone_ncpus_online != 0);
   2457 		zone->zone_ncpus_online--;
   2458 	}
   2459 
   2460 	if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) !=
   2461 	    NULL) {
   2462 		kstat_zone_remove(ksp, zoneid);
   2463 		kstat_rele(ksp);
   2464 	}
   2465 	if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) {
   2466 		kstat_zone_remove(ksp, zoneid);
   2467 		kstat_rele(ksp);
   2468 	}
   2469 	if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) {
   2470 		kstat_zone_remove(ksp, zoneid);
   2471 		kstat_rele(ksp);
   2472 	}
   2473 	(void) snprintf(name, sizeof (name), "cpu_stat%d", cpun);
   2474 	if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES))
   2475 	    != NULL) {
   2476 		kstat_zone_remove(ksp, zoneid);
   2477 		kstat_rele(ksp);
   2478 	}
   2479 }
   2480 
   2481 /*
   2482  * Update relevant kstats such that cpu is no longer visible to processes
   2483  * executing in specified zone.
   2484  */
   2485 void
   2486 cpu_visibility_remove(cpu_t *cp, zone_t *zone)
   2487 {
   2488 	if (cpu_is_active(cp))
   2489 		cpu_visibility_offline(cp, zone);
   2490 	cpu_visibility_unconfigure(cp, zone);
   2491 }
   2492 
   2493 /*
   2494  * Bind a thread to a CPU as requested.
   2495  */
   2496 int
   2497 cpu_bind_thread(kthread_id_t tp, processorid_t bind, processorid_t *obind,
   2498     int *error)
   2499 {
   2500 	processorid_t	binding;
   2501 	cpu_t		*cp = NULL;
   2502 
   2503 	ASSERT(MUTEX_HELD(&cpu_lock));
   2504 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
   2505 
   2506 	thread_lock(tp);
   2507 
   2508 	/*
   2509 	 * Record old binding, but change the obind, which was initialized
   2510 	 * to PBIND_NONE, only if this thread has a binding.  This avoids
   2511 	 * reporting PBIND_NONE for a process when some LWPs are bound.
   2512 	 */
   2513 	binding = tp->t_bind_cpu;
   2514 	if (binding != PBIND_NONE)
   2515 		*obind = binding;	/* record old binding */
   2516 
   2517 	switch (bind) {
   2518 	case PBIND_QUERY:
   2519 		/* Just return the old binding */
   2520 		thread_unlock(tp);
   2521 		return (0);
   2522 
   2523 	case PBIND_QUERY_TYPE:
   2524 		/* Return the binding type */
   2525 		*obind = TB_CPU_IS_SOFT(tp) ? PBIND_SOFT : PBIND_HARD;
   2526 		thread_unlock(tp);
   2527 		return (0);
   2528 
   2529 	case PBIND_SOFT:
   2530 		/*
   2531 		 *  Set soft binding for this thread and return the actual
   2532 		 *  binding
   2533 		 */
   2534 		TB_CPU_SOFT_SET(tp);
   2535 		thread_unlock(tp);
   2536 		return (0);
   2537 
   2538 	case PBIND_HARD:
   2539 		/*
   2540 		 *  Set hard binding for this thread and return the actual
   2541 		 *  binding
   2542 		 */
   2543 		TB_CPU_HARD_SET(tp);
   2544 		thread_unlock(tp);
   2545 		return (0);
   2546 
   2547 	default:
   2548 		break;
   2549 	}
   2550 
   2551 	/*
   2552 	 * If this thread/LWP cannot be bound because of permission
   2553 	 * problems, just note that and return success so that the
   2554 	 * other threads/LWPs will be bound.  This is the way
   2555 	 * processor_bind() is defined to work.
   2556 	 *
   2557 	 * Binding will get EPERM if the thread is of system class
   2558 	 * or hasprocperm() fails.
   2559 	 */
   2560 	if (tp->t_cid == 0 || !hasprocperm(tp->t_cred, CRED())) {
   2561 		*error = EPERM;
   2562 		thread_unlock(tp);
   2563 		return (0);
   2564 	}
   2565 
   2566 	binding = bind;
   2567 	if (binding != PBIND_NONE) {
   2568 		cp = cpu_get((processorid_t)binding);
   2569 		/*
   2570 		 * Make sure binding is valid and is in right partition.
   2571 		 */
   2572 		if (cp == NULL || tp->t_cpupart != cp->cpu_part) {
   2573 			*error = EINVAL;
   2574 			thread_unlock(tp);
   2575 			return (0);
   2576 		}
   2577 	}
   2578 	tp->t_bind_cpu = binding;	/* set new binding */
   2579 
   2580 	/*
   2581 	 * If there is no system-set reason for affinity, set
   2582 	 * the t_bound_cpu field to reflect the binding.
   2583 	 */
   2584 	if (tp->t_affinitycnt == 0) {
   2585 		if (binding == PBIND_NONE) {
   2586 			/*
   2587 			 * We may need to adjust disp_max_unbound_pri
   2588 			 * since we're becoming unbound.
   2589 			 */
   2590 			disp_adjust_unbound_pri(tp);
   2591 
   2592 			tp->t_bound_cpu = NULL;	/* set new binding */
   2593 
   2594 			/*
   2595 			 * Move thread to lgroup with strongest affinity
   2596 			 * after unbinding
   2597 			 */
   2598 			if (tp->t_lgrp_affinity)
   2599 				lgrp_move_thread(tp,
   2600 				    lgrp_choose(tp, tp->t_cpupart), 1);
   2601 
   2602 			if (tp->t_state == TS_ONPROC &&
   2603 			    tp->t_cpu->cpu_part != tp->t_cpupart)
   2604 				cpu_surrender(tp);
   2605 		} else {
   2606 			lpl_t	*lpl;
   2607 
   2608 			tp->t_bound_cpu = cp;
   2609 			ASSERT(cp->cpu_lpl != NULL);
   2610 
   2611 			/*
   2612 			 * Set home to lgroup with most affinity containing CPU
   2613 			 * that thread is being bound or minimum bounding
   2614 			 * lgroup if no affinities set
   2615 			 */
   2616 			if (tp->t_lgrp_affinity)
   2617 				lpl = lgrp_affinity_best(tp, tp->t_cpupart,
   2618 				    LGRP_NONE, B_FALSE);
   2619 			else
   2620 				lpl = cp->cpu_lpl;
   2621 
   2622 			if (tp->t_lpl != lpl) {
   2623 				/* can't grab cpu_lock */
   2624 				lgrp_move_thread(tp, lpl, 1);
   2625 			}
   2626 
   2627 			/*
   2628 			 * Make the thread switch to the bound CPU.
   2629 			 * If the thread is runnable, we need to
   2630 			 * requeue it even if t_cpu is already set
   2631 			 * to the right CPU, since it may be on a
   2632 			 * kpreempt queue and need to move to a local
   2633 			 * queue.  We could check t_disp_queue to
   2634 			 * avoid unnecessary overhead if it's already
   2635 			 * on the right queue, but since this isn't
   2636 			 * a performance-critical operation it doesn't
   2637 			 * seem worth the extra code and complexity.
   2638 			 *
   2639 			 * If the thread is weakbound to the cpu then it will
   2640 			 * resist the new binding request until the weak
   2641 			 * binding drops.  The cpu_surrender or requeueing
   2642 			 * below could be skipped in such cases (since it
   2643 			 * will have no effect), but that would require
   2644 			 * thread_allowmigrate to acquire thread_lock so
   2645 			 * we'll take the very occasional hit here instead.
   2646 			 */
   2647 			if (tp->t_state == TS_ONPROC) {
   2648 				cpu_surrender(tp);
   2649 			} else if (tp->t_state == TS_RUN) {
   2650 				cpu_t *ocp = tp->t_cpu;
   2651 
   2652 				(void) dispdeq(tp);
   2653 				setbackdq(tp);
   2654 				/*
   2655 				 * Either on the bound CPU's disp queue now,
   2656 				 * or swapped out or on the swap queue.
   2657 				 */
   2658 				ASSERT(tp->t_disp_queue == cp->cpu_disp ||
   2659 				    tp->t_weakbound_cpu == ocp ||
   2660 				    (tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ))
   2661 				    != TS_LOAD);
   2662 			}
   2663 		}
   2664 	}
   2665 
   2666 	/*
   2667 	 * Our binding has changed; set TP_CHANGEBIND.
   2668 	 */
   2669 	tp->t_proc_flag |= TP_CHANGEBIND;
   2670 	aston(tp);
   2671 
   2672 	thread_unlock(tp);
   2673 
   2674 	return (0);
   2675 }
   2676 
   2677 #if CPUSET_WORDS > 1
   2678 
   2679 /*
   2680  * Functions for implementing cpuset operations when a cpuset is more
   2681  * than one word.  On platforms where a cpuset is a single word these
   2682  * are implemented as macros in cpuvar.h.
   2683  */
   2684 
   2685 void
   2686 cpuset_all(cpuset_t *s)
   2687 {
   2688 	int i;
   2689 
   2690 	for (i = 0; i < CPUSET_WORDS; i++)
   2691 		s->cpub[i] = ~0UL;
   2692 }
   2693 
   2694 void
   2695 cpuset_all_but(cpuset_t *s, uint_t cpu)
   2696 {
   2697 	cpuset_all(s);
   2698 	CPUSET_DEL(*s, cpu);
   2699 }
   2700 
   2701 void
   2702 cpuset_only(cpuset_t *s, uint_t cpu)
   2703 {
   2704 	CPUSET_ZERO(*s);
   2705 	CPUSET_ADD(*s, cpu);
   2706 }
   2707 
   2708 int
   2709 cpuset_isnull(cpuset_t *s)
   2710 {
   2711 	int i;
   2712 
   2713 	for (i = 0; i < CPUSET_WORDS; i++)
   2714 		if (s->cpub[i] != 0)
   2715 			return (0);
   2716 	return (1);
   2717 }
   2718 
   2719 int
   2720 cpuset_cmp(cpuset_t *s1, cpuset_t *s2)
   2721 {
   2722 	int i;
   2723 
   2724 	for (i = 0; i < CPUSET_WORDS; i++)
   2725 		if (s1->cpub[i] != s2->cpub[i])
   2726 			return (0);
   2727 	return (1);
   2728 }
   2729 
   2730 uint_t
   2731 cpuset_find(cpuset_t *s)
   2732 {
   2733 
   2734 	uint_t	i;
   2735 	uint_t	cpu = (uint_t)-1;
   2736 
   2737 	/*
   2738 	 * Find a cpu in the cpuset
   2739 	 */
   2740 	for (i = 0; i < CPUSET_WORDS; i++) {
   2741 		cpu = (uint_t)(lowbit(s->cpub[i]) - 1);
   2742 		if (cpu != (uint_t)-1) {
   2743 			cpu += i * BT_NBIPUL;
   2744 			break;
   2745 		}
   2746 	}
   2747 	return (cpu);
   2748 }
   2749 
   2750 void
   2751 cpuset_bounds(cpuset_t *s, uint_t *smallestid, uint_t *largestid)
   2752 {
   2753 	int	i, j;
   2754 	uint_t	bit;
   2755 
   2756 	/*
   2757 	 * First, find the smallest cpu id in the set.
   2758 	 */
   2759 	for (i = 0; i < CPUSET_WORDS; i++) {
   2760 		if (s->cpub[i] != 0) {
   2761 			bit = (uint_t)(lowbit(s->cpub[i]) - 1);
   2762 			ASSERT(bit != (uint_t)-1);
   2763 			*smallestid = bit + (i * BT_NBIPUL);
   2764 
   2765 			/*
   2766 			 * Now find the largest cpu id in
   2767 			 * the set and return immediately.
   2768 			 * Done in an inner loop to avoid
   2769 			 * having to break out of the first
   2770 			 * loop.
   2771 			 */
   2772 			for (j = CPUSET_WORDS - 1; j >= i; j--) {
   2773 				if (s->cpub[j] != 0) {
   2774 					bit = (uint_t)(highbit(s->cpub[j]) - 1);
   2775 					ASSERT(bit != (uint_t)-1);
   2776 					*largestid = bit + (j * BT_NBIPUL);
   2777 					ASSERT(*largestid >= *smallestid);
   2778 					return;
   2779 				}
   2780 			}
   2781 
   2782 			/*
   2783 			 * If this code is reached, a
   2784 			 * smallestid was found, but not a
   2785 			 * largestid. The cpuset must have
   2786 			 * been changed during the course
   2787 			 * of this function call.
   2788 			 */
   2789 			ASSERT(0);
   2790 		}
   2791 	}
   2792 	*smallestid = *largestid = CPUSET_NOTINSET;
   2793 }
   2794 
   2795 #endif	/* CPUSET_WORDS */
   2796 
   2797 /*
   2798  * Unbind threads bound to specified CPU.
   2799  *
   2800  * If `unbind_all_threads' is true, unbind all user threads bound to a given
   2801  * CPU. Otherwise unbind all soft-bound user threads.
   2802  */
   2803 int
   2804 cpu_unbind(processorid_t cpu, boolean_t unbind_all_threads)
   2805 {
   2806 	processorid_t obind;
   2807 	kthread_t *tp;
   2808 	int ret = 0;
   2809 	proc_t *pp;
   2810 	int err, berr = 0;
   2811 
   2812 	ASSERT(MUTEX_HELD(&cpu_lock));
   2813 
   2814 	mutex_enter(&pidlock);
   2815 	for (pp = practive; pp != NULL; pp = pp->p_next) {
   2816 		mutex_enter(&pp->p_lock);
   2817 		tp = pp->p_tlist;
   2818 		/*
   2819 		 * Skip zombies, kernel processes, and processes in
   2820 		 * other zones, if called from a non-global zone.
   2821 		 */
   2822 		if (tp == NULL || (pp->p_flag & SSYS) ||
   2823 		    !HASZONEACCESS(curproc, pp->p_zone->zone_id)) {
   2824 			mutex_exit(&pp->p_lock);
   2825 			continue;
   2826 		}
   2827 		do {
   2828 			if (tp->t_bind_cpu != cpu)
   2829 				continue;
   2830 			/*
   2831 			 * Skip threads with hard binding when
   2832 			 * `unbind_all_threads' is not specified.
   2833 			 */
   2834 			if (!unbind_all_threads && TB_CPU_IS_HARD(tp))
   2835 				continue;
   2836 			err = cpu_bind_thread(tp, PBIND_NONE, &obind, &berr);
   2837 			if (ret == 0)
   2838 				ret = err;
   2839 		} while ((tp = tp->t_forw) != pp->p_tlist);
   2840 		mutex_exit(&pp->p_lock);
   2841 	}
   2842 	mutex_exit(&pidlock);
   2843 	if (ret == 0)
   2844 		ret = berr;
   2845 	return (ret);
   2846 }
   2847 
   2848 
   2849 /*
   2850  * Destroy all remaining bound threads on a cpu.
   2851  */
   2852 void
   2853 cpu_destroy_bound_threads(cpu_t *cp)
   2854 {
   2855 	extern id_t syscid;
   2856 	register kthread_id_t	t, tlist, tnext;
   2857 
   2858 	/*
   2859 	 * Destroy all remaining bound threads on the cpu.  This
   2860 	 * should include both the interrupt threads and the idle thread.
   2861 	 * This requires some care, since we need to traverse the
   2862 	 * thread list with the pidlock mutex locked, but thread_free
   2863 	 * also locks the pidlock mutex.  So, we collect the threads
   2864 	 * we're going to reap in a list headed by "tlist", then we
   2865 	 * unlock the pidlock mutex and traverse the tlist list,
   2866 	 * doing thread_free's on the thread's.	 Simple, n'est pas?
   2867 	 * Also, this depends on thread_free not mucking with the
   2868 	 * t_next and t_prev links of the thread.
   2869 	 */
   2870 
   2871 	if ((t = curthread) != NULL) {
   2872 
   2873 		tlist = NULL;
   2874 		mutex_enter(&pidlock);
   2875 		do {
   2876 			tnext = t->t_next;
   2877 			if (t->t_bound_cpu == cp) {
   2878 
   2879 				/*
   2880 				 * We've found a bound thread, carefully unlink
   2881 				 * it out of the thread list, and add it to
   2882 				 * our "tlist".	 We "know" we don't have to
   2883 				 * worry about unlinking curthread (the thread
   2884 				 * that is executing this code).
   2885 				 */
   2886 				t->t_next->t_prev = t->t_prev;
   2887 				t->t_prev->t_next = t->t_next;
   2888 				t->t_next = tlist;
   2889 				tlist = t;
   2890 				ASSERT(t->t_cid == syscid);
   2891 				/* wake up anyone blocked in thread_join */
   2892 				cv_broadcast(&t->t_joincv);
   2893 				/*
   2894 				 * t_lwp set by interrupt threads and not
   2895 				 * cleared.
   2896 				 */
   2897 				t->t_lwp = NULL;
   2898 				/*
   2899 				 * Pause and idle threads always have
   2900 				 * t_state set to TS_ONPROC.
   2901 				 */
   2902 				t->t_state = TS_FREE;
   2903 				t->t_prev = NULL;	/* Just in case */
   2904 			}
   2905 
   2906 		} while ((t = tnext) != curthread);
   2907 
   2908 		mutex_exit(&pidlock);
   2909 
   2910 		mutex_sync();
   2911 		for (t = tlist; t != NULL; t = tnext) {
   2912 			tnext = t->t_next;
   2913 			thread_free(t);
   2914 		}
   2915 	}
   2916 }
   2917 
   2918 /*
   2919  * Update the cpu_supp_freqs of this cpu. This information is returned
   2920  * as part of cpu_info kstats. If the cpu_info_kstat exists already, then
   2921  * maintain the kstat data size.
   2922  */
   2923 void
   2924 cpu_set_supp_freqs(cpu_t *cp, const char *freqs)
   2925 {
   2926 	char clkstr[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */
   2927 	const char *lfreqs = clkstr;
   2928 	boolean_t kstat_exists = B_FALSE;
   2929 	kstat_t *ksp;
   2930 	size_t len;
   2931 
   2932 	/*
   2933 	 * A NULL pointer means we only support one speed.
   2934 	 */
   2935 	if (freqs == NULL)
   2936 		(void) snprintf(clkstr, sizeof (clkstr), "%"PRIu64,
   2937 		    cp->cpu_curr_clock);
   2938 	else
   2939 		lfreqs = freqs;
   2940 
   2941 	/*
   2942 	 * Make sure the frequency doesn't change while a snapshot is
   2943 	 * going on. Of course, we only need to worry about this if
   2944 	 * the kstat exists.
   2945 	 */
   2946 	if ((ksp = cp->cpu_info_kstat) != NULL) {
   2947 		mutex_enter(ksp->ks_lock);
   2948 		kstat_exists = B_TRUE;
   2949 	}
   2950 
   2951 	/*
   2952 	 * Free any previously allocated string and if the kstat
   2953 	 * already exists, then update its data size.
   2954 	 */
   2955 	if (cp->cpu_supp_freqs != NULL) {
   2956 		len = strlen(cp->cpu_supp_freqs) + 1;
   2957 		kmem_free(cp->cpu_supp_freqs, len);
   2958 		if (kstat_exists)
   2959 			ksp->ks_data_size -= len;
   2960 	}
   2961 
   2962 	/*
   2963 	 * Allocate the new string and set the pointer.
   2964 	 */
   2965 	len = strlen(lfreqs) + 1;
   2966 	cp->cpu_supp_freqs = kmem_alloc(len, KM_SLEEP);
   2967 	(void) strcpy(cp->cpu_supp_freqs, lfreqs);
   2968 
   2969 	/*
   2970 	 * If the kstat already exists then update the data size and
   2971 	 * free the lock.
   2972 	 */
   2973 	if (kstat_exists) {
   2974 		ksp->ks_data_size += len;
   2975 		mutex_exit(ksp->ks_lock);
   2976 	}
   2977 }
   2978 
   2979 /*
   2980  * Indicate the current CPU's clock freqency (in Hz).
   2981  * The calling context must be such that CPU references are safe.
   2982  */
   2983 void
   2984 cpu_set_curr_clock(uint64_t new_clk)
   2985 {
   2986 	uint64_t old_clk;
   2987 
   2988 	old_clk = CPU->cpu_curr_clock;
   2989 	CPU->cpu_curr_clock = new_clk;
   2990 
   2991 	/*
   2992 	 * The cpu-change-speed DTrace probe exports the frequency in Hz
   2993 	 */
   2994 	DTRACE_PROBE3(cpu__change__speed, processorid_t, CPU->cpu_id,
   2995 	    uint64_t, old_clk, uint64_t, new_clk);
   2996 }
   2997 
   2998 /*
   2999  * processor_info(2) and p_online(2) status support functions
   3000  *   The constants returned by the cpu_get_state() and cpu_get_state_str() are
   3001  *   for use in communicating processor state information to userland.  Kernel
   3002  *   subsystems should only be using the cpu_flags value directly.  Subsystems
   3003  *   modifying cpu_flags should record the state change via a call to the
   3004  *   cpu_set_state().
   3005  */
   3006 
   3007 /*
   3008  * Update the pi_state of this CPU.  This function provides the CPU status for
   3009  * the information returned by processor_info(2).
   3010  */
   3011 void
   3012 cpu_set_state(cpu_t *cpu)
   3013 {
   3014 	ASSERT(MUTEX_HELD(&cpu_lock));
   3015 	cpu->cpu_type_info.pi_state = cpu_get_state(cpu);
   3016 	cpu->cpu_state_begin = gethrestime_sec();
   3017 	pool_cpu_mod = gethrtime();
   3018 }
   3019 
   3020 /*
   3021  * Return offline/online/other status for the indicated CPU.  Use only for
   3022  * communication with user applications; cpu_flags provides the in-kernel
   3023  * interface.
   3024  */
   3025 int
   3026 cpu_get_state(cpu_t *cpu)
   3027 {
   3028 	ASSERT(MUTEX_HELD(&cpu_lock));
   3029 	if (cpu->cpu_flags & CPU_POWEROFF)
   3030 		return (P_POWEROFF);
   3031 	else if (cpu->cpu_flags & CPU_FAULTED)
   3032 		return (P_FAULTED);
   3033 	else if (cpu->cpu_flags & CPU_SPARE)
   3034 		return (P_SPARE);
   3035 	else if ((cpu->cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY)
   3036 		return (P_OFFLINE);
   3037 	else if (cpu->cpu_flags & CPU_ENABLE)
   3038 		return (P_ONLINE);
   3039 	else
   3040 		return (P_NOINTR);
   3041 }
   3042 
   3043 /*
   3044  * Return processor_info(2) state as a string.
   3045  */
   3046 const char *
   3047 cpu_get_state_str(cpu_t *cpu)
   3048 {
   3049 	const char *string;
   3050 
   3051 	switch (cpu_get_state(cpu)) {
   3052 	case P_ONLINE:
   3053 		string = PS_ONLINE;
   3054 		break;
   3055 	case P_POWEROFF:
   3056 		string = PS_POWEROFF;
   3057 		break;
   3058 	case P_NOINTR:
   3059 		string = PS_NOINTR;
   3060 		break;
   3061 	case P_SPARE:
   3062 		string = PS_SPARE;
   3063 		break;
   3064 	case P_FAULTED:
   3065 		string = PS_FAULTED;
   3066 		break;
   3067 	case P_OFFLINE:
   3068 		string = PS_OFFLINE;
   3069 		break;
   3070 	default:
   3071 		string = "unknown";
   3072 		break;
   3073 	}
   3074 	return (string);
   3075 }
   3076 
   3077 /*
   3078  * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named
   3079  * kstats, respectively.  This is done when a CPU is initialized or placed
   3080  * online via p_online(2).
   3081  */
   3082 static void
   3083 cpu_stats_kstat_create(cpu_t *cp)
   3084 {
   3085 	int 	instance = cp->cpu_id;
   3086 	char 	*module = "cpu";
   3087 	char 	*class = "misc";
   3088 	kstat_t	*ksp;
   3089 	zoneid_t zoneid;
   3090 
   3091 	ASSERT(MUTEX_HELD(&cpu_lock));
   3092 
   3093 	if (pool_pset_enabled())
   3094 		zoneid = GLOBAL_ZONEID;
   3095 	else
   3096 		zoneid = ALL_ZONES;
   3097 	/*
   3098 	 * Create named kstats
   3099 	 */
   3100 #define	CPU_STATS_KS_CREATE(name, tsize, update_func)                    \
   3101 	ksp = kstat_create_zone(module, instance, (name), class,         \
   3102 	    KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0,       \
   3103 	    zoneid);                                                     \
   3104 	if (ksp != NULL) {                                               \
   3105 		ksp->ks_private = cp;                                    \
   3106 		ksp->ks_update = (update_func);                          \
   3107 		kstat_install(ksp);                                      \
   3108 	} else                                                           \
   3109 		cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \
   3110 		    module, instance, (name));
   3111 
   3112 	CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template),
   3113 	    cpu_sys_stats_ks_update);
   3114 	CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template),
   3115 	    cpu_vm_stats_ks_update);
   3116 
   3117 	/*
   3118 	 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat.
   3119 	 */
   3120 	ksp = kstat_create_zone("cpu_stat", cp->cpu_id, NULL,
   3121 	    "misc", KSTAT_TYPE_RAW, sizeof (cpu_stat_t), 0, zoneid);
   3122 	if (ksp != NULL) {
   3123 		ksp->ks_update = cpu_stat_ks_update;
   3124 		ksp->ks_private = cp;
   3125 		kstat_install(ksp);
   3126 	}
   3127 }
   3128 
   3129 static void
   3130 cpu_stats_kstat_destroy(cpu_t *cp)
   3131 {
   3132 	char ks_name[KSTAT_STRLEN];
   3133 
   3134 	(void) sprintf(ks_name, "cpu_stat%d", cp->cpu_id);
   3135 	kstat_delete_byname("cpu_stat", cp->cpu_id, ks_name);
   3136 
   3137 	kstat_delete_byname("cpu", cp->cpu_id, "sys");
   3138 	kstat_delete_byname("cpu", cp->cpu_id, "vm");
   3139 }
   3140 
   3141 static int
   3142 cpu_sys_stats_ks_update(kstat_t *ksp, int rw)
   3143 {
   3144 	cpu_t *cp = (cpu_t *)ksp->ks_private;
   3145 	struct cpu_sys_stats_ks_data *csskd;
   3146 	cpu_sys_stats_t *css;
   3147 	hrtime_t msnsecs[NCMSTATES];
   3148 	int	i;
   3149 
   3150 	if (rw == KSTAT_WRITE)
   3151 		return (EACCES);
   3152 
   3153 	csskd = ksp->ks_data;
   3154 	css = &cp->cpu_stats.sys;
   3155 
   3156 	/*
   3157 	 * Read CPU mstate, but compare with the last values we
   3158 	 * received to make sure that the returned kstats never
   3159 	 * decrease.
   3160 	 */
   3161 
   3162 	get_cpu_mstate(cp, msnsecs);
   3163 	if (csskd->cpu_nsec_idle.value.ui64 > msnsecs[CMS_IDLE])
   3164 		msnsecs[CMS_IDLE] = csskd->cpu_nsec_idle.value.ui64;
   3165 	if (csskd->cpu_nsec_user.value.ui64 > msnsecs[CMS_USER])
   3166 		msnsecs[CMS_USER] = csskd->cpu_nsec_user.value.ui64;
   3167 	if (csskd->cpu_nsec_kernel.value.ui64 > msnsecs[CMS_SYSTEM])
   3168 		msnsecs[CMS_SYSTEM] = csskd->cpu_nsec_kernel.value.ui64;
   3169 
   3170 	bcopy(&cpu_sys_stats_ks_data_template, ksp->ks_data,
   3171 	    sizeof (cpu_sys_stats_ks_data_template));
   3172 
   3173 	csskd->cpu_ticks_wait.value.ui64 = 0;
   3174 	csskd->wait_ticks_io.value.ui64 = 0;
   3175 
   3176 	csskd->cpu_nsec_idle.value.ui64 = msnsecs[CMS_IDLE];
   3177 	csskd->cpu_nsec_user.value.ui64 = msnsecs[CMS_USER];
   3178 	csskd->cpu_nsec_kernel.value.ui64 = msnsecs[CMS_SYSTEM];
   3179 	csskd->cpu_ticks_idle.value.ui64 =
   3180 	    NSEC_TO_TICK(csskd->cpu_nsec_idle.value.ui64);
   3181 	csskd->cpu_ticks_user.value.ui64 =
   3182 	    NSEC_TO_TICK(csskd->cpu_nsec_user.value.ui64);
   3183 	csskd->cpu_ticks_kernel.value.ui64 =
   3184 	    NSEC_TO_TICK(csskd->cpu_nsec_kernel.value.ui64);
   3185 	csskd->cpu_nsec_intr.value.ui64 = cp->cpu_intrlast;
   3186 	csskd->cpu_load_intr.value.ui64 = cp->cpu_intrload;
   3187 	csskd->bread.value.ui64 = css->bread;
   3188 	csskd->bwrite.value.ui64 = css->bwrite;
   3189 	csskd->lread.value.ui64 = css->lread;
   3190 	csskd->lwrite.value.ui64 = css->lwrite;
   3191 	csskd->phread.value.ui64 = css->phread;
   3192 	csskd->phwrite.value.ui64 = css->phwrite;
   3193 	csskd->pswitch.value.ui64 = css->pswitch;
   3194 	csskd->trap.value.ui64 = css->trap;
   3195 	csskd->intr.value.ui64 = 0;
   3196 	for (i = 0; i < PIL_MAX; i++)
   3197 		csskd->intr.value.ui64 += css->intr[i];
   3198 	csskd->syscall.value.ui64 = css->syscall;
   3199 	csskd->sysread.value.ui64 = css->sysread;
   3200 	csskd->syswrite.value.ui64 = css->syswrite;
   3201 	csskd->sysfork.value.ui64 = css->sysfork;
   3202 	csskd->sysvfork.value.ui64 = css->sysvfork;
   3203 	csskd->sysexec.value.ui64 = css->sysexec;
   3204 	csskd->readch.value.ui64 = css->readch;
   3205 	csskd->writech.value.ui64 = css->writech;
   3206 	csskd->rcvint.value.ui64 = css->rcvint;
   3207 	csskd->xmtint.value.ui64 = css->xmtint;
   3208 	csskd->mdmint.value.ui64 = css->mdmint;
   3209 	csskd->rawch.value.ui64 = css->rawch;
   3210 	csskd->canch.value.ui64 = css->canch;
   3211 	csskd->outch.value.ui64 = css->outch;
   3212 	csskd->msg.value.ui64 = css->msg;
   3213 	csskd->sema.value.ui64 = css->sema;
   3214 	csskd->namei.value.ui64 = css->namei;
   3215 	csskd->ufsiget.value.ui64 = css->ufsiget;
   3216 	csskd->ufsdirblk.value.ui64 = css->ufsdirblk;
   3217 	csskd->ufsipage.value.ui64 = css->ufsipage;
   3218 	csskd->ufsinopage.value.ui64 = css->ufsinopage;
   3219 	csskd->procovf.value.ui64 = css->procovf;
   3220 	csskd->intrthread.value.ui64 = 0;
   3221 	for (i = 0; i < LOCK_LEVEL - 1; i++)
   3222 		csskd->intrthread.value.ui64 += css->intr[i];
   3223 	csskd->intrblk.value.ui64 = css->intrblk;
   3224 	csskd->intrunpin.value.ui64 = css->intrunpin;
   3225 	csskd->idlethread.value.ui64 = css->idlethread;
   3226 	csskd->inv_swtch.value.ui64 = css->inv_swtch;
   3227 	csskd->nthreads.value.ui64 = css->nthreads;
   3228 	csskd->cpumigrate.value.ui64 = css->cpumigrate;
   3229 	csskd->xcalls.value.ui64 = css->xcalls;
   3230 	csskd->mutex_adenters.value.ui64 = css->mutex_adenters;
   3231 	csskd->rw_rdfails.value.ui64 = css->rw_rdfails;
   3232 	csskd->rw_wrfails.value.ui64 = css->rw_wrfails;
   3233 	csskd->modload.value.ui64 = css->modload;
   3234 	csskd->modunload.value.ui64 = css->modunload;
   3235 	csskd->bawrite.value.ui64 = css->bawrite;
   3236 	csskd->iowait.value.ui64 = css->iowait;
   3237 
   3238 	return (0);
   3239 }
   3240 
   3241 static int
   3242 cpu_vm_stats_ks_update(kstat_t *ksp, int rw)
   3243 {
   3244 	cpu_t *cp = (cpu_t *)ksp->ks_private;
   3245 	struct cpu_vm_stats_ks_data *cvskd;
   3246 	cpu_vm_stats_t *cvs;
   3247 
   3248 	if (rw == KSTAT_WRITE)
   3249 		return (EACCES);
   3250 
   3251 	cvs = &cp->cpu_stats.vm;
   3252 	cvskd = ksp->ks_data;
   3253 
   3254 	bcopy(&cpu_vm_stats_ks_data_template, ksp->ks_data,
   3255 	    sizeof (cpu_vm_stats_ks_data_template));
   3256 	cvskd->pgrec.value.ui64 = cvs->pgrec;
   3257 	cvskd->pgfrec.value.ui64 = cvs->pgfrec;
   3258 	cvskd->pgin.value.ui64 = cvs->pgin;
   3259 	cvskd->pgpgin.value.ui64 = cvs->pgpgin;
   3260 	cvskd->pgout.value.ui64 = cvs->pgout;
   3261 	cvskd->pgpgout.value.ui64 = cvs->pgpgout;
   3262 	cvskd->swapin.value.ui64 = cvs->swapin;
   3263 	cvskd->pgswapin.value.ui64 = cvs->pgswapin;
   3264 	cvskd->swapout.value.ui64 = cvs->swapout;
   3265 	cvskd->pgswapout.value.ui64 = cvs->pgswapout;
   3266 	cvskd->zfod.value.ui64 = cvs->zfod;
   3267 	cvskd->dfree.value.ui64 = cvs->dfree;
   3268 	cvskd->scan.value.ui64 = cvs->scan;
   3269 	cvskd->rev.value.ui64 = cvs->rev;
   3270 	cvskd->hat_fault.value.ui64 = cvs->hat_fault;
   3271 	cvskd->as_fault.value.ui64 = cvs->as_fault;
   3272 	cvskd->maj_fault.value.ui64 = cvs->maj_fault;
   3273 	cvskd->cow_fault.value.ui64 = cvs->cow_fault;
   3274 	cvskd->prot_fault.value.ui64 = cvs->prot_fault;
   3275 	cvskd->softlock.value.ui64 = cvs->softlock;
   3276 	cvskd->kernel_asflt.value.ui64 = cvs->kernel_asflt;
   3277 	cvskd->pgrrun.value.ui64 = cvs->pgrrun;
   3278 	cvskd->execpgin.value.ui64 = cvs->execpgin;
   3279 	cvskd->execpgout.value.ui64 = cvs->execpgout;
   3280 	cvskd->execfree.value.ui64 = cvs->execfree;
   3281 	cvskd->anonpgin.value.ui64 = cvs->anonpgin;
   3282 	cvskd->anonpgout.value.ui64 = cvs->anonpgout;
   3283 	cvskd->anonfree.value.ui64 = cvs->anonfree;
   3284 	cvskd->fspgin.value.ui64 = cvs->fspgin;
   3285 	cvskd->fspgout.value.ui64 = cvs->fspgout;
   3286 	cvskd->fsfree.value.ui64 = cvs->fsfree;
   3287 
   3288 	return (0);
   3289 }
   3290 
   3291 static int
   3292 cpu_stat_ks_update(kstat_t *ksp, int rw)
   3293 {
   3294 	cpu_stat_t *cso;
   3295 	cpu_t *cp;
   3296 	int i;
   3297 	hrtime_t msnsecs[NCMSTATES];
   3298 
   3299 	cso = (cpu_stat_t *)ksp->ks_data;
   3300 	cp = (cpu_t *)ksp->ks_private;
   3301 
   3302 	if (rw == KSTAT_WRITE)
   3303 		return (EACCES);
   3304 
   3305 	/*
   3306 	 * Read CPU mstate, but compare with the last values we
   3307 	 * received to make sure that the returned kstats never
   3308 	 * decrease.
   3309 	 */
   3310 
   3311 	get_cpu_mstate(cp, msnsecs);
   3312 	msnsecs[CMS_IDLE] = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
   3313 	msnsecs[CMS_USER] = NSEC_TO_TICK(msnsecs[CMS_USER]);
   3314 	msnsecs[CMS_SYSTEM] = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
   3315 	if (cso->cpu_sysinfo.cpu[CPU_IDLE] < msnsecs[CMS_IDLE])
   3316 		cso->cpu_sysinfo.cpu[CPU_IDLE] = msnsecs[CMS_IDLE];
   3317 	if (cso->cpu_sysinfo.cpu[CPU_USER] < msnsecs[CMS_USER])
   3318 		cso->cpu_sysinfo.cpu[CPU_USER] = msnsecs[CMS_USER];
   3319 	if (cso->cpu_sysinfo.cpu[CPU_KERNEL] < msnsecs[CMS_SYSTEM])
   3320 		cso->cpu_sysinfo.cpu[CPU_KERNEL] = msnsecs[CMS_SYSTEM];
   3321 	cso->cpu_sysinfo.cpu[CPU_WAIT] 	= 0;
   3322 	cso->cpu_sysinfo.wait[W_IO] 	= 0;
   3323 	cso->cpu_sysinfo.wait[W_SWAP]	= 0;
   3324 	cso->cpu_sysinfo.wait[W_PIO]	= 0;
   3325 	cso->cpu_sysinfo.bread 		= CPU_STATS(cp, sys.bread);
   3326 	cso->cpu_sysinfo.bwrite 	= CPU_STATS(cp, sys.bwrite);
   3327 	cso->cpu_sysinfo.lread 		= CPU_STATS(cp, sys.lread);
   3328 	cso->cpu_sysinfo.lwrite 	= CPU_STATS(cp, sys.lwrite);
   3329 	cso->cpu_sysinfo.phread 	= CPU_STATS(cp, sys.phread);
   3330 	cso->cpu_sysinfo.phwrite 	= CPU_STATS(cp, sys.phwrite);
   3331 	cso->cpu_sysinfo.pswitch 	= CPU_STATS(cp, sys.pswitch);
   3332 	cso->cpu_sysinfo.trap 		= CPU_STATS(cp, sys.trap);
   3333 	cso->cpu_sysinfo.intr		= 0;
   3334 	for (i = 0; i < PIL_MAX; i++)
   3335 		cso->cpu_sysinfo.intr += CPU_STATS(cp, sys.intr[i]);
   3336 	cso->cpu_sysinfo.syscall	= CPU_STATS(cp, sys.syscall);
   3337 	cso->cpu_sysinfo.sysread	= CPU_STATS(cp, sys.sysread);
   3338 	cso->cpu_sysinfo.syswrite	= CPU_STATS(cp, sys.syswrite);
   3339 	cso->cpu_sysinfo.sysfork	= CPU_STATS(cp, sys.sysfork);
   3340 	cso->cpu_sysinfo.sysvfork	= CPU_STATS(cp, sys.sysvfork);
   3341 	cso->cpu_sysinfo.sysexec	= CPU_STATS(cp, sys.sysexec);
   3342 	cso->cpu_sysinfo.readch		= CPU_STATS(cp, sys.readch);
   3343 	cso->cpu_sysinfo.writech	= CPU_STATS(cp, sys.writech);
   3344 	cso->cpu_sysinfo.rcvint		= CPU_STATS(cp, sys.rcvint);
   3345 	cso->cpu_sysinfo.xmtint		= CPU_STATS(cp, sys.xmtint);
   3346 	cso->cpu_sysinfo.mdmint		= CPU_STATS(cp, sys.mdmint);
   3347 	cso->cpu_sysinfo.rawch		= CPU_STATS(cp, sys.rawch);
   3348 	cso->cpu_sysinfo.canch		= CPU_STATS(cp, sys.canch);
   3349 	cso->cpu_sysinfo.outch		= CPU_STATS(cp, sys.outch);
   3350 	cso->cpu_sysinfo.msg		= CPU_STATS(cp, sys.msg);
   3351 	cso->cpu_sysinfo.sema		= CPU_STATS(cp, sys.sema);
   3352 	cso->cpu_sysinfo.namei		= CPU_STATS(cp, sys.namei);
   3353 	cso->cpu_sysinfo.ufsiget	= CPU_STATS(cp, sys.ufsiget);
   3354 	cso->cpu_sysinfo.ufsdirblk	= CPU_STATS(cp, sys.ufsdirblk);
   3355 	cso->cpu_sysinfo.ufsipage	= CPU_STATS(cp, sys.ufsipage);
   3356 	cso->cpu_sysinfo.ufsinopage	= CPU_STATS(cp, sys.ufsinopage);
   3357 	cso->cpu_sysinfo.inodeovf	= 0;
   3358 	cso->cpu_sysinfo.fileovf	= 0;
   3359 	cso->cpu_sysinfo.procovf	= CPU_STATS(cp, sys.procovf);
   3360 	cso->cpu_sysinfo.intrthread	= 0;
   3361 	for (i = 0; i < LOCK_LEVEL - 1; i++)
   3362 		cso->cpu_sysinfo.intrthread += CPU_STATS(cp, sys.intr[i]);
   3363 	cso->cpu_sysinfo.intrblk	= CPU_STATS(cp, sys.intrblk);
   3364 	cso->cpu_sysinfo.idlethread	= CPU_STATS(cp, sys.idlethread);
   3365 	cso->cpu_sysinfo.inv_swtch	= CPU_STATS(cp, sys.inv_swtch);
   3366 	cso->cpu_sysinfo.nthreads	= CPU_STATS(cp, sys.nthreads);
   3367 	cso->cpu_sysinfo.cpumigrate	= CPU_STATS(cp, sys.cpumigrate);
   3368 	cso->cpu_sysinfo.xcalls		= CPU_STATS(cp, sys.xcalls);
   3369 	cso->cpu_sysinfo.mutex_adenters	= CPU_STATS(cp, sys.mutex_adenters);
   3370 	cso->cpu_sysinfo.rw_rdfails	= CPU_STATS(cp, sys.rw_rdfails);
   3371 	cso->cpu_sysinfo.rw_wrfails	= CPU_STATS(cp, sys.rw_wrfails);
   3372 	cso->cpu_sysinfo.modload	= CPU_STATS(cp, sys.modload);
   3373 	cso->cpu_sysinfo.modunload	= CPU_STATS(cp, sys.modunload);
   3374 	cso->cpu_sysinfo.bawrite	= CPU_STATS(cp, sys.bawrite);
   3375 	cso->cpu_sysinfo.rw_enters	= 0;
   3376 	cso->cpu_sysinfo.win_uo_cnt	= 0;
   3377 	cso->cpu_sysinfo.win_uu_cnt	= 0;
   3378 	cso->cpu_sysinfo.win_so_cnt	= 0;
   3379 	cso->cpu_sysinfo.win_su_cnt	= 0;
   3380 	cso->cpu_sysinfo.win_suo_cnt	= 0;
   3381 
   3382 	cso->cpu_syswait.iowait		= CPU_STATS(cp, sys.iowait);
   3383 	cso->cpu_syswait.swap		= 0;
   3384 	cso->cpu_syswait.physio		= 0;
   3385 
   3386 	cso->cpu_vminfo.pgrec		= CPU_STATS(cp, vm.pgrec);
   3387 	cso->cpu_vminfo.pgfrec		= CPU_STATS(cp, vm.pgfrec);
   3388 	cso->cpu_vminfo.pgin		= CPU_STATS(cp, vm.pgin);
   3389 	cso->cpu_vminfo.pgpgin		= CPU_STATS(cp, vm.pgpgin);
   3390 	cso->cpu_vminfo.pgout		= CPU_STATS(cp, vm.pgout);
   3391 	cso->cpu_vminfo.pgpgout		= CPU_STATS(cp, vm.pgpgout);
   3392 	cso->cpu_vminfo.swapin		= CPU_STATS(cp, vm.swapin);
   3393 	cso->cpu_vminfo.pgswapin	= CPU_STATS(cp, vm.pgswapin);
   3394 	cso->cpu_vminfo.swapout		= CPU_STATS(cp, vm.swapout);
   3395 	cso->cpu_vminfo.pgswapout	= CPU_STATS(cp, vm.pgswapout);
   3396 	cso->cpu_vminfo.zfod		= CPU_STATS(cp, vm.zfod);
   3397 	cso->cpu_vminfo.dfree		= CPU_STATS(cp, vm.dfree);
   3398 	cso->cpu_vminfo.scan		= CPU_STATS(cp, vm.scan);
   3399 	cso->cpu_vminfo.rev		= CPU_STATS(cp, vm.rev);
   3400 	cso->cpu_vminfo.hat_fault	= CPU_STATS(cp, vm.hat_fault);
   3401 	cso->cpu_vminfo.as_fault	= CPU_STATS(cp, vm.as_fault);
   3402 	cso->cpu_vminfo.maj_fault	= CPU_STATS(cp, vm.maj_fault);
   3403 	cso->cpu_vminfo.cow_fault	= CPU_STATS(cp, vm.cow_fault);
   3404 	cso->cpu_vminfo.prot_fault	= CPU_STATS(cp, vm.prot_fault);
   3405 	cso->cpu_vminfo.softlock	= CPU_STATS(cp, vm.softlock);
   3406 	cso->cpu_vminfo.kernel_asflt	= CPU_STATS(cp, vm.kernel_asflt);
   3407 	cso->cpu_vminfo.pgrrun		= CPU_STATS(cp, vm.pgrrun);
   3408 	cso->cpu_vminfo.execpgin	= CPU_STATS(cp, vm.execpgin);
   3409 	cso->cpu_vminfo.execpgout	= CPU_STATS(cp, vm.execpgout);
   3410 	cso->cpu_vminfo.execfree	= CPU_STATS(cp, vm.execfree);
   3411 	cso->cpu_vminfo.anonpgin	= CPU_STATS(cp, vm.anonpgin);
   3412 	cso->cpu_vminfo.anonpgout	= CPU_STATS(cp, vm.anonpgout);
   3413 	cso->cpu_vminfo.anonfree	= CPU_STATS(cp, vm.anonfree);
   3414 	cso->cpu_vminfo.fspgin		= CPU_STATS(cp, vm.fspgin);
   3415 	cso->cpu_vminfo.fspgout		= CPU_STATS(cp, vm.fspgout);
   3416 	cso->cpu_vminfo.fsfree		= CPU_STATS(cp, vm.fsfree);
   3417 
   3418 	return (0);
   3419 }
   3420