Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Architecture-independent CPU control functions.
     28  */
     29 
     30 #include <sys/types.h>
     31 #include <sys/param.h>
     32 #include <sys/var.h>
     33 #include <sys/thread.h>
     34 #include <sys/cpuvar.h>
     35 #include <sys/cpu_event.h>
     36 #include <sys/kstat.h>
     37 #include <sys/uadmin.h>
     38 #include <sys/systm.h>
     39 #include <sys/errno.h>
     40 #include <sys/cmn_err.h>
     41 #include <sys/procset.h>
     42 #include <sys/processor.h>
     43 #include <sys/debug.h>
     44 #include <sys/cpupart.h>
     45 #include <sys/lgrp.h>
     46 #include <sys/pset.h>
     47 #include <sys/pghw.h>
     48 #include <sys/kmem.h>
     49 #include <sys/kmem_impl.h>	/* to set per-cpu kmem_cache offset */
     50 #include <sys/atomic.h>
     51 #include <sys/callb.h>
     52 #include <sys/vtrace.h>
     53 #include <sys/cyclic.h>
     54 #include <sys/bitmap.h>
     55 #include <sys/nvpair.h>
     56 #include <sys/pool_pset.h>
     57 #include <sys/msacct.h>
     58 #include <sys/time.h>
     59 #include <sys/archsystm.h>
     60 #include <sys/sdt.h>
     61 #if defined(__x86) || defined(__amd64)
     62 #include <sys/x86_archext.h>
     63 #endif
     64 #include <sys/callo.h>
     65 
     66 extern int	mp_cpu_start(cpu_t *);
     67 extern int	mp_cpu_stop(cpu_t *);
     68 extern int	mp_cpu_poweron(cpu_t *);
     69 extern int	mp_cpu_poweroff(cpu_t *);
     70 extern int	mp_cpu_configure(int);
     71 extern int	mp_cpu_unconfigure(int);
     72 extern void	mp_cpu_faulted_enter(cpu_t *);
     73 extern void	mp_cpu_faulted_exit(cpu_t *);
     74 
     75 extern int cmp_cpu_to_chip(processorid_t cpuid);
     76 #ifdef __sparcv9
     77 extern char *cpu_fru_fmri(cpu_t *cp);
     78 #endif
     79 
     80 static void cpu_add_active_internal(cpu_t *cp);
     81 static void cpu_remove_active(cpu_t *cp);
     82 static void cpu_info_kstat_create(cpu_t *cp);
     83 static void cpu_info_kstat_destroy(cpu_t *cp);
     84 static void cpu_stats_kstat_create(cpu_t *cp);
     85 static void cpu_stats_kstat_destroy(cpu_t *cp);
     86 
     87 static int cpu_sys_stats_ks_update(kstat_t *ksp, int rw);
     88 static int cpu_vm_stats_ks_update(kstat_t *ksp, int rw);
     89 static int cpu_stat_ks_update(kstat_t *ksp, int rw);
     90 static int cpu_state_change_hooks(int, cpu_setup_t, cpu_setup_t);
     91 
     92 /*
     93  * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active,
     94  * and dispatch queue reallocations.  The lock ordering with respect to
     95  * related locks is:
     96  *
     97  *	cpu_lock --> thread_free_lock  --->  p_lock  --->  thread_lock()
     98  *
     99  * Warning:  Certain sections of code do not use the cpu_lock when
    100  * traversing the cpu_list (e.g. mutex_vector_enter(), clock()).  Since
    101  * all cpus are paused during modifications to this list, a solution
    102  * to protect the list is too either disable kernel preemption while
    103  * walking the list, *or* recheck the cpu_next pointer at each
    104  * iteration in the loop.  Note that in no cases can any cached
    105  * copies of the cpu pointers be kept as they may become invalid.
    106  */
    107 kmutex_t	cpu_lock;
    108 cpu_t		*cpu_list;		/* list of all CPUs */
    109 cpu_t		*clock_cpu_list;	/* used by clock to walk CPUs */
    110 cpu_t		*cpu_active;		/* list of active CPUs */
    111 static cpuset_t	cpu_available;		/* set of available CPUs */
    112 cpuset_t	cpu_seqid_inuse;	/* which cpu_seqids are in use */
    113 
    114 cpu_t		**cpu_seq;		/* ptrs to CPUs, indexed by seq_id */
    115 
    116 /*
    117  * max_ncpus keeps the max cpus the system can have. Initially
    118  * it's NCPU, but since most archs scan the devtree for cpus
    119  * fairly early on during boot, the real max can be known before
    120  * ncpus is set (useful for early NCPU based allocations).
    121  */
    122 int max_ncpus = NCPU;
    123 /*
    124  * platforms that set max_ncpus to maxiumum number of cpus that can be
    125  * dynamically added will set boot_max_ncpus to the number of cpus found
    126  * at device tree scan time during boot.
    127  */
    128 int boot_max_ncpus = -1;
    129 int boot_ncpus = -1;
    130 /*
    131  * Maximum possible CPU id.  This can never be >= NCPU since NCPU is
    132  * used to size arrays that are indexed by CPU id.
    133  */
    134 processorid_t max_cpuid = NCPU - 1;
    135 
    136 int ncpus = 1;
    137 int ncpus_online = 1;
    138 
    139 /*
    140  * CPU that we're trying to offline.  Protected by cpu_lock.
    141  */
    142 cpu_t *cpu_inmotion;
    143 
    144 /*
    145  * Can be raised to suppress further weakbinding, which are instead
    146  * satisfied by disabling preemption.  Must be raised/lowered under cpu_lock,
    147  * while individual thread weakbinding synchronization is done under thread
    148  * lock.
    149  */
    150 int weakbindingbarrier;
    151 
    152 /*
    153  * Variables used in pause_cpus().
    154  */
    155 static volatile char safe_list[NCPU];
    156 
    157 static struct _cpu_pause_info {
    158 	int		cp_spl;		/* spl saved in pause_cpus() */
    159 	volatile int	cp_go;		/* Go signal sent after all ready */
    160 	int		cp_count;	/* # of CPUs to pause */
    161 	ksema_t		cp_sem;		/* synch pause_cpus & cpu_pause */
    162 	kthread_id_t	cp_paused;
    163 } cpu_pause_info;
    164 
    165 static kmutex_t pause_free_mutex;
    166 static kcondvar_t pause_free_cv;
    167 
    168 void *(*cpu_pause_func)(void *) = NULL;
    169 
    170 
    171 static struct cpu_sys_stats_ks_data {
    172 	kstat_named_t cpu_ticks_idle;
    173 	kstat_named_t cpu_ticks_user;
    174 	kstat_named_t cpu_ticks_kernel;
    175 	kstat_named_t cpu_ticks_wait;
    176 	kstat_named_t cpu_nsec_idle;
    177 	kstat_named_t cpu_nsec_user;
    178 	kstat_named_t cpu_nsec_kernel;
    179 	kstat_named_t cpu_nsec_intr;
    180 	kstat_named_t cpu_load_intr;
    181 	kstat_named_t wait_ticks_io;
    182 	kstat_named_t bread;
    183 	kstat_named_t bwrite;
    184 	kstat_named_t lread;
    185 	kstat_named_t lwrite;
    186 	kstat_named_t phread;
    187 	kstat_named_t phwrite;
    188 	kstat_named_t pswitch;
    189 	kstat_named_t trap;
    190 	kstat_named_t intr;
    191 	kstat_named_t syscall;
    192 	kstat_named_t sysread;
    193 	kstat_named_t syswrite;
    194 	kstat_named_t sysfork;
    195 	kstat_named_t sysvfork;
    196 	kstat_named_t sysexec;
    197 	kstat_named_t readch;
    198 	kstat_named_t writech;
    199 	kstat_named_t rcvint;
    200 	kstat_named_t xmtint;
    201 	kstat_named_t mdmint;
    202 	kstat_named_t rawch;
    203 	kstat_named_t canch;
    204 	kstat_named_t outch;
    205 	kstat_named_t msg;
    206 	kstat_named_t sema;
    207 	kstat_named_t namei;
    208 	kstat_named_t ufsiget;
    209 	kstat_named_t ufsdirblk;
    210 	kstat_named_t ufsipage;
    211 	kstat_named_t ufsinopage;
    212 	kstat_named_t procovf;
    213 	kstat_named_t intrthread;
    214 	kstat_named_t intrblk;
    215 	kstat_named_t intrunpin;
    216 	kstat_named_t idlethread;
    217 	kstat_named_t inv_swtch;
    218 	kstat_named_t nthreads;
    219 	kstat_named_t cpumigrate;
    220 	kstat_named_t xcalls;
    221 	kstat_named_t mutex_adenters;
    222 	kstat_named_t rw_rdfails;
    223 	kstat_named_t rw_wrfails;
    224 	kstat_named_t modload;
    225 	kstat_named_t modunload;
    226 	kstat_named_t bawrite;
    227 	kstat_named_t iowait;
    228 } cpu_sys_stats_ks_data_template = {
    229 	{ "cpu_ticks_idle", 	KSTAT_DATA_UINT64 },
    230 	{ "cpu_ticks_user", 	KSTAT_DATA_UINT64 },
    231 	{ "cpu_ticks_kernel", 	KSTAT_DATA_UINT64 },
    232 	{ "cpu_ticks_wait", 	KSTAT_DATA_UINT64 },
    233 	{ "cpu_nsec_idle",	KSTAT_DATA_UINT64 },
    234 	{ "cpu_nsec_user",	KSTAT_DATA_UINT64 },
    235 	{ "cpu_nsec_kernel",	KSTAT_DATA_UINT64 },
    236 	{ "cpu_nsec_intr",	KSTAT_DATA_UINT64 },
    237 	{ "cpu_load_intr",	KSTAT_DATA_UINT64 },
    238 	{ "wait_ticks_io", 	KSTAT_DATA_UINT64 },
    239 	{ "bread", 		KSTAT_DATA_UINT64 },
    240 	{ "bwrite", 		KSTAT_DATA_UINT64 },
    241 	{ "lread", 		KSTAT_DATA_UINT64 },
    242 	{ "lwrite", 		KSTAT_DATA_UINT64 },
    243 	{ "phread", 		KSTAT_DATA_UINT64 },
    244 	{ "phwrite", 		KSTAT_DATA_UINT64 },
    245 	{ "pswitch", 		KSTAT_DATA_UINT64 },
    246 	{ "trap", 		KSTAT_DATA_UINT64 },
    247 	{ "intr", 		KSTAT_DATA_UINT64 },
    248 	{ "syscall", 		KSTAT_DATA_UINT64 },
    249 	{ "sysread", 		KSTAT_DATA_UINT64 },
    250 	{ "syswrite", 		KSTAT_DATA_UINT64 },
    251 	{ "sysfork", 		KSTAT_DATA_UINT64 },
    252 	{ "sysvfork", 		KSTAT_DATA_UINT64 },
    253 	{ "sysexec", 		KSTAT_DATA_UINT64 },
    254 	{ "readch", 		KSTAT_DATA_UINT64 },
    255 	{ "writech", 		KSTAT_DATA_UINT64 },
    256 	{ "rcvint", 		KSTAT_DATA_UINT64 },
    257 	{ "xmtint", 		KSTAT_DATA_UINT64 },
    258 	{ "mdmint", 		KSTAT_DATA_UINT64 },
    259 	{ "rawch", 		KSTAT_DATA_UINT64 },
    260 	{ "canch", 		KSTAT_DATA_UINT64 },
    261 	{ "outch", 		KSTAT_DATA_UINT64 },
    262 	{ "msg", 		KSTAT_DATA_UINT64 },
    263 	{ "sema", 		KSTAT_DATA_UINT64 },
    264 	{ "namei", 		KSTAT_DATA_UINT64 },
    265 	{ "ufsiget", 		KSTAT_DATA_UINT64 },
    266 	{ "ufsdirblk", 		KSTAT_DATA_UINT64 },
    267 	{ "ufsipage", 		KSTAT_DATA_UINT64 },
    268 	{ "ufsinopage", 	KSTAT_DATA_UINT64 },
    269 	{ "procovf", 		KSTAT_DATA_UINT64 },
    270 	{ "intrthread", 	KSTAT_DATA_UINT64 },
    271 	{ "intrblk", 		KSTAT_DATA_UINT64 },
    272 	{ "intrunpin",		KSTAT_DATA_UINT64 },
    273 	{ "idlethread", 	KSTAT_DATA_UINT64 },
    274 	{ "inv_swtch", 		KSTAT_DATA_UINT64 },
    275 	{ "nthreads", 		KSTAT_DATA_UINT64 },
    276 	{ "cpumigrate", 	KSTAT_DATA_UINT64 },
    277 	{ "xcalls", 		KSTAT_DATA_UINT64 },
    278 	{ "mutex_adenters", 	KSTAT_DATA_UINT64 },
    279 	{ "rw_rdfails", 	KSTAT_DATA_UINT64 },
    280 	{ "rw_wrfails", 	KSTAT_DATA_UINT64 },
    281 	{ "modload", 		KSTAT_DATA_UINT64 },
    282 	{ "modunload", 		KSTAT_DATA_UINT64 },
    283 	{ "bawrite", 		KSTAT_DATA_UINT64 },
    284 	{ "iowait",		KSTAT_DATA_UINT64 },
    285 };
    286 
    287 static struct cpu_vm_stats_ks_data {
    288 	kstat_named_t pgrec;
    289 	kstat_named_t pgfrec;
    290 	kstat_named_t pgin;
    291 	kstat_named_t pgpgin;
    292 	kstat_named_t pgout;
    293 	kstat_named_t pgpgout;
    294 	kstat_named_t swapin;
    295 	kstat_named_t pgswapin;
    296 	kstat_named_t swapout;
    297 	kstat_named_t pgswapout;
    298 	kstat_named_t zfod;
    299 	kstat_named_t dfree;
    300 	kstat_named_t scan;
    301 	kstat_named_t rev;
    302 	kstat_named_t hat_fault;
    303 	kstat_named_t as_fault;
    304 	kstat_named_t maj_fault;
    305 	kstat_named_t cow_fault;
    306 	kstat_named_t prot_fault;
    307 	kstat_named_t softlock;
    308 	kstat_named_t kernel_asflt;
    309 	kstat_named_t pgrrun;
    310 	kstat_named_t execpgin;
    311 	kstat_named_t execpgout;
    312 	kstat_named_t execfree;
    313 	kstat_named_t anonpgin;
    314 	kstat_named_t anonpgout;
    315 	kstat_named_t anonfree;
    316 	kstat_named_t fspgin;
    317 	kstat_named_t fspgout;
    318 	kstat_named_t fsfree;
    319 } cpu_vm_stats_ks_data_template = {
    320 	{ "pgrec",		KSTAT_DATA_UINT64 },
    321 	{ "pgfrec",		KSTAT_DATA_UINT64 },
    322 	{ "pgin",		KSTAT_DATA_UINT64 },
    323 	{ "pgpgin",		KSTAT_DATA_UINT64 },
    324 	{ "pgout",		KSTAT_DATA_UINT64 },
    325 	{ "pgpgout",		KSTAT_DATA_UINT64 },
    326 	{ "swapin",		KSTAT_DATA_UINT64 },
    327 	{ "pgswapin",		KSTAT_DATA_UINT64 },
    328 	{ "swapout",		KSTAT_DATA_UINT64 },
    329 	{ "pgswapout",		KSTAT_DATA_UINT64 },
    330 	{ "zfod",		KSTAT_DATA_UINT64 },
    331 	{ "dfree",		KSTAT_DATA_UINT64 },
    332 	{ "scan",		KSTAT_DATA_UINT64 },
    333 	{ "rev",		KSTAT_DATA_UINT64 },
    334 	{ "hat_fault",		KSTAT_DATA_UINT64 },
    335 	{ "as_fault",		KSTAT_DATA_UINT64 },
    336 	{ "maj_fault",		KSTAT_DATA_UINT64 },
    337 	{ "cow_fault",		KSTAT_DATA_UINT64 },
    338 	{ "prot_fault",		KSTAT_DATA_UINT64 },
    339 	{ "softlock",		KSTAT_DATA_UINT64 },
    340 	{ "kernel_asflt",	KSTAT_DATA_UINT64 },
    341 	{ "pgrrun",		KSTAT_DATA_UINT64 },
    342 	{ "execpgin",		KSTAT_DATA_UINT64 },
    343 	{ "execpgout",		KSTAT_DATA_UINT64 },
    344 	{ "execfree",		KSTAT_DATA_UINT64 },
    345 	{ "anonpgin",		KSTAT_DATA_UINT64 },
    346 	{ "anonpgout",		KSTAT_DATA_UINT64 },
    347 	{ "anonfree",		KSTAT_DATA_UINT64 },
    348 	{ "fspgin",		KSTAT_DATA_UINT64 },
    349 	{ "fspgout",		KSTAT_DATA_UINT64 },
    350 	{ "fsfree",		KSTAT_DATA_UINT64 },
    351 };
    352 
    353 /*
    354  * Force the specified thread to migrate to the appropriate processor.
    355  * Called with thread lock held, returns with it dropped.
    356  */
    357 static void
    358 force_thread_migrate(kthread_id_t tp)
    359 {
    360 	ASSERT(THREAD_LOCK_HELD(tp));
    361 	if (tp == curthread) {
    362 		THREAD_TRANSITION(tp);
    363 		CL_SETRUN(tp);
    364 		thread_unlock_nopreempt(tp);
    365 		swtch();
    366 	} else {
    367 		if (tp->t_state == TS_ONPROC) {
    368 			cpu_surrender(tp);
    369 		} else if (tp->t_state == TS_RUN) {
    370 			(void) dispdeq(tp);
    371 			setbackdq(tp);
    372 		}
    373 		thread_unlock(tp);
    374 	}
    375 }
    376 
    377 /*
    378  * Set affinity for a specified CPU.
    379  * A reference count is incremented and the affinity is held until the
    380  * reference count is decremented to zero by thread_affinity_clear().
    381  * This is so regions of code requiring affinity can be nested.
    382  * Caller needs to ensure that cpu_id remains valid, which can be
    383  * done by holding cpu_lock across this call, unless the caller
    384  * specifies CPU_CURRENT in which case the cpu_lock will be acquired
    385  * by thread_affinity_set and CPU->cpu_id will be the target CPU.
    386  */
    387 void
    388 thread_affinity_set(kthread_id_t t, int cpu_id)
    389 {
    390 	cpu_t		*cp;
    391 	int		c;
    392 
    393 	ASSERT(!(t == curthread && t->t_weakbound_cpu != NULL));
    394 
    395 	if ((c = cpu_id) == CPU_CURRENT) {
    396 		mutex_enter(&cpu_lock);
    397 		cpu_id = CPU->cpu_id;
    398 	}
    399 	/*
    400 	 * We should be asserting that cpu_lock is held here, but
    401 	 * the NCA code doesn't acquire it.  The following assert
    402 	 * should be uncommented when the NCA code is fixed.
    403 	 *
    404 	 * ASSERT(MUTEX_HELD(&cpu_lock));
    405 	 */
    406 	ASSERT((cpu_id >= 0) && (cpu_id < NCPU));
    407 	cp = cpu[cpu_id];
    408 	ASSERT(cp != NULL);		/* user must provide a good cpu_id */
    409 	/*
    410 	 * If there is already a hard affinity requested, and this affinity
    411 	 * conflicts with that, panic.
    412 	 */
    413 	thread_lock(t);
    414 	if (t->t_affinitycnt > 0 && t->t_bound_cpu != cp) {
    415 		panic("affinity_set: setting %p but already bound to %p",
    416 		    (void *)cp, (void *)t->t_bound_cpu);
    417 	}
    418 	t->t_affinitycnt++;
    419 	t->t_bound_cpu = cp;
    420 
    421 	/*
    422 	 * Make sure we're running on the right CPU.
    423 	 */
    424 	if (cp != t->t_cpu || t != curthread) {
    425 		force_thread_migrate(t);	/* drops thread lock */
    426 	} else {
    427 		thread_unlock(t);
    428 	}
    429 
    430 	if (c == CPU_CURRENT)
    431 		mutex_exit(&cpu_lock);
    432 }
    433 
    434 /*
    435  *	Wrapper for backward compatibility.
    436  */
    437 void
    438 affinity_set(int cpu_id)
    439 {
    440 	thread_affinity_set(curthread, cpu_id);
    441 }
    442 
    443 /*
    444  * Decrement the affinity reservation count and if it becomes zero,
    445  * clear the CPU affinity for the current thread, or set it to the user's
    446  * software binding request.
    447  */
    448 void
    449 thread_affinity_clear(kthread_id_t t)
    450 {
    451 	register processorid_t binding;
    452 
    453 	thread_lock(t);
    454 	if (--t->t_affinitycnt == 0) {
    455 		if ((binding = t->t_bind_cpu) == PBIND_NONE) {
    456 			/*
    457 			 * Adjust disp_max_unbound_pri if necessary.
    458 			 */
    459 			disp_adjust_unbound_pri(t);
    460 			t->t_bound_cpu = NULL;
    461 			if (t->t_cpu->cpu_part != t->t_cpupart) {
    462 				force_thread_migrate(t);
    463 				return;
    464 			}
    465 		} else {
    466 			t->t_bound_cpu = cpu[binding];
    467 			/*
    468 			 * Make sure the thread is running on the bound CPU.
    469 			 */
    470 			if (t->t_cpu != t->t_bound_cpu) {
    471 				force_thread_migrate(t);
    472 				return;		/* already dropped lock */
    473 			}
    474 		}
    475 	}
    476 	thread_unlock(t);
    477 }
    478 
    479 /*
    480  * Wrapper for backward compatibility.
    481  */
    482 void
    483 affinity_clear(void)
    484 {
    485 	thread_affinity_clear(curthread);
    486 }
    487 
    488 /*
    489  * Weak cpu affinity.  Bind to the "current" cpu for short periods
    490  * of time during which the thread must not block (but may be preempted).
    491  * Use this instead of kpreempt_disable() when it is only "no migration"
    492  * rather than "no preemption" semantics that are required - disabling
    493  * preemption holds higher priority threads off of cpu and if the
    494  * operation that is protected is more than momentary this is not good
    495  * for realtime etc.
    496  *
    497  * Weakly bound threads will not prevent a cpu from being offlined -
    498  * we'll only run them on the cpu to which they are weakly bound but
    499  * (because they do not block) we'll always be able to move them on to
    500  * another cpu at offline time if we give them just a short moment to
    501  * run during which they will unbind.  To give a cpu a chance of offlining,
    502  * however, we require a barrier to weak bindings that may be raised for a
    503  * given cpu (offline/move code may set this and then wait a short time for
    504  * existing weak bindings to drop); the cpu_inmotion pointer is that barrier.
    505  *
    506  * There are few restrictions on the calling context of thread_nomigrate.
    507  * The caller must not hold the thread lock.  Calls may be nested.
    508  *
    509  * After weakbinding a thread must not perform actions that may block.
    510  * In particular it must not call thread_affinity_set; calling that when
    511  * already weakbound is nonsensical anyway.
    512  *
    513  * If curthread is prevented from migrating for other reasons
    514  * (kernel preemption disabled; high pil; strongly bound; interrupt thread)
    515  * then the weak binding will succeed even if this cpu is the target of an
    516  * offline/move request.
    517  */
    518 void
    519 thread_nomigrate(void)
    520 {
    521 	cpu_t *cp;
    522 	kthread_id_t t = curthread;
    523 
    524 again:
    525 	kpreempt_disable();
    526 	cp = CPU;
    527 
    528 	/*
    529 	 * A highlevel interrupt must not modify t_nomigrate or
    530 	 * t_weakbound_cpu of the thread it has interrupted.  A lowlevel
    531 	 * interrupt thread cannot migrate and we can avoid the
    532 	 * thread_lock call below by short-circuiting here.  In either
    533 	 * case we can just return since no migration is possible and
    534 	 * the condition will persist (ie, when we test for these again
    535 	 * in thread_allowmigrate they can't have changed).   Migration
    536 	 * is also impossible if we're at or above DISP_LEVEL pil.
    537 	 */
    538 	if (CPU_ON_INTR(cp) || t->t_flag & T_INTR_THREAD ||
    539 	    getpil() >= DISP_LEVEL) {
    540 		kpreempt_enable();
    541 		return;
    542 	}
    543 
    544 	/*
    545 	 * We must be consistent with existing weak bindings.  Since we
    546 	 * may be interrupted between the increment of t_nomigrate and
    547 	 * the store to t_weakbound_cpu below we cannot assume that
    548 	 * t_weakbound_cpu will be set if t_nomigrate is.  Note that we
    549 	 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not
    550 	 * always the case.
    551 	 */
    552 	if (t->t_nomigrate && t->t_weakbound_cpu && t->t_weakbound_cpu != cp) {
    553 		if (!panicstr)
    554 			panic("thread_nomigrate: binding to %p but already "
    555 			    "bound to %p", (void *)cp,
    556 			    (void *)t->t_weakbound_cpu);
    557 	}
    558 
    559 	/*
    560 	 * At this point we have preemption disabled and we don't yet hold
    561 	 * the thread lock.  So it's possible that somebody else could
    562 	 * set t_bind_cpu here and not be able to force us across to the
    563 	 * new cpu (since we have preemption disabled).
    564 	 */
    565 	thread_lock(curthread);
    566 
    567 	/*
    568 	 * If further weak bindings are being (temporarily) suppressed then
    569 	 * we'll settle for disabling kernel preemption (which assures
    570 	 * no migration provided the thread does not block which it is
    571 	 * not allowed to if using thread_nomigrate).  We must remember
    572 	 * this disposition so we can take appropriate action in
    573 	 * thread_allowmigrate.  If this is a nested call and the
    574 	 * thread is already weakbound then fall through as normal.
    575 	 * We remember the decision to settle for kpreempt_disable through
    576 	 * negative nesting counting in t_nomigrate.  Once a thread has had one
    577 	 * weakbinding request satisfied in this way any further (nested)
    578 	 * requests will continue to be satisfied in the same way,
    579 	 * even if weak bindings have recommenced.
    580 	 */
    581 	if (t->t_nomigrate < 0 || weakbindingbarrier && t->t_nomigrate == 0) {
    582 		--t->t_nomigrate;
    583 		thread_unlock(curthread);
    584 		return;		/* with kpreempt_disable still active */
    585 	}
    586 
    587 	/*
    588 	 * We hold thread_lock so t_bind_cpu cannot change.  We could,
    589 	 * however, be running on a different cpu to which we are t_bound_cpu
    590 	 * to (as explained above).  If we grant the weak binding request
    591 	 * in that case then the dispatcher must favour our weak binding
    592 	 * over our strong (in which case, just as when preemption is
    593 	 * disabled, we can continue to run on a cpu other than the one to
    594 	 * which we are strongbound; the difference in this case is that
    595 	 * this thread can be preempted and so can appear on the dispatch
    596 	 * queues of a cpu other than the one it is strongbound to).
    597 	 *
    598 	 * If the cpu we are running on does not appear to be a current
    599 	 * offline target (we check cpu_inmotion to determine this - since
    600 	 * we don't hold cpu_lock we may not see a recent store to that,
    601 	 * so it's possible that we at times can grant a weak binding to a
    602 	 * cpu that is an offline target, but that one request will not
    603 	 * prevent the offline from succeeding) then we will always grant
    604 	 * the weak binding request.  This includes the case above where
    605 	 * we grant a weakbinding not commensurate with our strong binding.
    606 	 *
    607 	 * If our cpu does appear to be an offline target then we're inclined
    608 	 * not to grant the weakbinding request just yet - we'd prefer to
    609 	 * migrate to another cpu and grant the request there.  The
    610 	 * exceptions are those cases where going through preemption code
    611 	 * will not result in us changing cpu:
    612 	 *
    613 	 *	. interrupts have already bypassed this case (see above)
    614 	 *	. we are already weakbound to this cpu (dispatcher code will
    615 	 *	  always return us to the weakbound cpu)
    616 	 *	. preemption was disabled even before we disabled it above
    617 	 *	. we are strongbound to this cpu (if we're strongbound to
    618 	 *	another and not yet running there the trip through the
    619 	 *	dispatcher will move us to the strongbound cpu and we
    620 	 *	will grant the weak binding there)
    621 	 */
    622 	if (cp != cpu_inmotion || t->t_nomigrate > 0 || t->t_preempt > 1 ||
    623 	    t->t_bound_cpu == cp) {
    624 		/*
    625 		 * Don't be tempted to store to t_weakbound_cpu only on
    626 		 * the first nested bind request - if we're interrupted
    627 		 * after the increment of t_nomigrate and before the
    628 		 * store to t_weakbound_cpu and the interrupt calls
    629 		 * thread_nomigrate then the assertion in thread_allowmigrate
    630 		 * would fail.
    631 		 */
    632 		t->t_nomigrate++;
    633 		t->t_weakbound_cpu = cp;
    634 		membar_producer();
    635 		thread_unlock(curthread);
    636 		/*
    637 		 * Now that we have dropped the thread_lock another thread
    638 		 * can set our t_weakbound_cpu, and will try to migrate us
    639 		 * to the strongbound cpu (which will not be prevented by
    640 		 * preemption being disabled since we're about to enable
    641 		 * preemption).  We have granted the weakbinding to the current
    642 		 * cpu, so again we are in the position that is is is possible
    643 		 * that our weak and strong bindings differ.  Again this
    644 		 * is catered for by dispatcher code which will favour our
    645 		 * weak binding.
    646 		 */
    647 		kpreempt_enable();
    648 	} else {
    649 		/*
    650 		 * Move to another cpu before granting the request by
    651 		 * forcing this thread through preemption code.  When we
    652 		 * get to set{front,back}dq called from CL_PREEMPT()
    653 		 * cpu_choose() will be used to select a cpu to queue
    654 		 * us on - that will see cpu_inmotion and take
    655 		 * steps to avoid returning us to this cpu.
    656 		 */
    657 		cp->cpu_kprunrun = 1;
    658 		thread_unlock(curthread);
    659 		kpreempt_enable();	/* will call preempt() */
    660 		goto again;
    661 	}
    662 }
    663 
    664 void
    665 thread_allowmigrate(void)
    666 {
    667 	kthread_id_t t = curthread;
    668 
    669 	ASSERT(t->t_weakbound_cpu == CPU ||
    670 	    (t->t_nomigrate < 0 && t->t_preempt > 0) ||
    671 	    CPU_ON_INTR(CPU) || t->t_flag & T_INTR_THREAD ||
    672 	    getpil() >= DISP_LEVEL);
    673 
    674 	if (CPU_ON_INTR(CPU) || (t->t_flag & T_INTR_THREAD) ||
    675 	    getpil() >= DISP_LEVEL)
    676 		return;
    677 
    678 	if (t->t_nomigrate < 0) {
    679 		/*
    680 		 * This thread was granted "weak binding" in the
    681 		 * stronger form of kernel preemption disabling.
    682 		 * Undo a level of nesting for both t_nomigrate
    683 		 * and t_preempt.
    684 		 */
    685 		++t->t_nomigrate;
    686 		kpreempt_enable();
    687 	} else if (--t->t_nomigrate == 0) {
    688 		/*
    689 		 * Time to drop the weak binding.  We need to cater
    690 		 * for the case where we're weakbound to a different
    691 		 * cpu than that to which we're strongbound (a very
    692 		 * temporary arrangement that must only persist until
    693 		 * weak binding drops).  We don't acquire thread_lock
    694 		 * here so even as this code executes t_bound_cpu
    695 		 * may be changing.  So we disable preemption and
    696 		 * a) in the case that t_bound_cpu changes while we
    697 		 * have preemption disabled kprunrun will be set
    698 		 * asynchronously, and b) if before disabling
    699 		 * preemption we were already on a different cpu to
    700 		 * our t_bound_cpu then we set kprunrun ourselves
    701 		 * to force a trip through the dispatcher when
    702 		 * preemption is enabled.
    703 		 */
    704 		kpreempt_disable();
    705 		if (t->t_bound_cpu &&
    706 		    t->t_weakbound_cpu != t->t_bound_cpu)
    707 			CPU->cpu_kprunrun = 1;
    708 		t->t_weakbound_cpu = NULL;
    709 		membar_producer();
    710 		kpreempt_enable();
    711 	}
    712 }
    713 
    714 /*
    715  * weakbinding_stop can be used to temporarily cause weakbindings made
    716  * with thread_nomigrate to be satisfied through the stronger action of
    717  * kpreempt_disable.  weakbinding_start recommences normal weakbinding.
    718  */
    719 
    720 void
    721 weakbinding_stop(void)
    722 {
    723 	ASSERT(MUTEX_HELD(&cpu_lock));
    724 	weakbindingbarrier = 1;
    725 	membar_producer();	/* make visible before subsequent thread_lock */
    726 }
    727 
    728 void
    729 weakbinding_start(void)
    730 {
    731 	ASSERT(MUTEX_HELD(&cpu_lock));
    732 	weakbindingbarrier = 0;
    733 }
    734 
    735 void
    736 null_xcall(void)
    737 {
    738 }
    739 
    740 /*
    741  * This routine is called to place the CPUs in a safe place so that
    742  * one of them can be taken off line or placed on line.  What we are
    743  * trying to do here is prevent a thread from traversing the list
    744  * of active CPUs while we are changing it or from getting placed on
    745  * the run queue of a CPU that has just gone off line.  We do this by
    746  * creating a thread with the highest possible prio for each CPU and
    747  * having it call this routine.  The advantage of this method is that
    748  * we can eliminate all checks for CPU_ACTIVE in the disp routines.
    749  * This makes disp faster at the expense of making p_online() slower
    750  * which is a good trade off.
    751  */
    752 static void
    753 cpu_pause(int index)
    754 {
    755 	int s;
    756 	struct _cpu_pause_info *cpi = &cpu_pause_info;
    757 	volatile char *safe = &safe_list[index];
    758 	long    lindex = index;
    759 
    760 	ASSERT((curthread->t_bound_cpu != NULL) || (*safe == PAUSE_DIE));
    761 
    762 	while (*safe != PAUSE_DIE) {
    763 		*safe = PAUSE_READY;
    764 		membar_enter();		/* make sure stores are flushed */
    765 		sema_v(&cpi->cp_sem);	/* signal requesting thread */
    766 
    767 		/*
    768 		 * Wait here until all pause threads are running.  That
    769 		 * indicates that it's safe to do the spl.  Until
    770 		 * cpu_pause_info.cp_go is set, we don't want to spl
    771 		 * because that might block clock interrupts needed
    772 		 * to preempt threads on other CPUs.
    773 		 */
    774 		while (cpi->cp_go == 0)
    775 			;
    776 		/*
    777 		 * Even though we are at the highest disp prio, we need
    778 		 * to block out all interrupts below LOCK_LEVEL so that
    779 		 * an intr doesn't come in, wake up a thread, and call
    780 		 * setbackdq/setfrontdq.
    781 		 */
    782 		s = splhigh();
    783 		/*
    784 		 * if cpu_pause_func() has been set then call it using
    785 		 * index as the argument, currently only used by
    786 		 * cpr_suspend_cpus().  This function is used as the
    787 		 * code to execute on the "paused" cpu's when a machine
    788 		 * comes out of a sleep state and CPU's were powered off.
    789 		 * (could also be used for hotplugging CPU's).
    790 		 */
    791 		if (cpu_pause_func != NULL)
    792 			(*cpu_pause_func)((void *)lindex);
    793 
    794 		mach_cpu_pause(safe);
    795 
    796 		splx(s);
    797 		/*
    798 		 * Waiting is at an end. Switch out of cpu_pause
    799 		 * loop and resume useful work.
    800 		 */
    801 		swtch();
    802 	}
    803 
    804 	mutex_enter(&pause_free_mutex);
    805 	*safe = PAUSE_DEAD;
    806 	cv_broadcast(&pause_free_cv);
    807 	mutex_exit(&pause_free_mutex);
    808 }
    809 
    810 /*
    811  * Allow the cpus to start running again.
    812  */
    813 void
    814 start_cpus()
    815 {
    816 	int i;
    817 
    818 	ASSERT(MUTEX_HELD(&cpu_lock));
    819 	ASSERT(cpu_pause_info.cp_paused);
    820 	cpu_pause_info.cp_paused = NULL;
    821 	for (i = 0; i < NCPU; i++)
    822 		safe_list[i] = PAUSE_IDLE;
    823 	membar_enter();			/* make sure stores are flushed */
    824 	affinity_clear();
    825 	splx(cpu_pause_info.cp_spl);
    826 	kpreempt_enable();
    827 }
    828 
    829 /*
    830  * Allocate a pause thread for a CPU.
    831  */
    832 static void
    833 cpu_pause_alloc(cpu_t *cp)
    834 {
    835 	kthread_id_t	t;
    836 	long		cpun = cp->cpu_id;
    837 
    838 	/*
    839 	 * Note, v.v_nglobpris will not change value as long as I hold
    840 	 * cpu_lock.
    841 	 */
    842 	t = thread_create(NULL, 0, cpu_pause, (void *)cpun,
    843 	    0, &p0, TS_STOPPED, v.v_nglobpris - 1);
    844 	thread_lock(t);
    845 	t->t_bound_cpu = cp;
    846 	t->t_disp_queue = cp->cpu_disp;
    847 	t->t_affinitycnt = 1;
    848 	t->t_preempt = 1;
    849 	thread_unlock(t);
    850 	cp->cpu_pause_thread = t;
    851 	/*
    852 	 * Registering a thread in the callback table is usually done
    853 	 * in the initialization code of the thread.  In this
    854 	 * case, we do it right after thread creation because the
    855 	 * thread itself may never run, and we need to register the
    856 	 * fact that it is safe for cpr suspend.
    857 	 */
    858 	CALLB_CPR_INIT_SAFE(t, "cpu_pause");
    859 }
    860 
    861 /*
    862  * Free a pause thread for a CPU.
    863  */
    864 static void
    865 cpu_pause_free(cpu_t *cp)
    866 {
    867 	kthread_id_t	t;
    868 	int		cpun = cp->cpu_id;
    869 
    870 	ASSERT(MUTEX_HELD(&cpu_lock));
    871 	/*
    872 	 * We have to get the thread and tell him to die.
    873 	 */
    874 	if ((t = cp->cpu_pause_thread) == NULL) {
    875 		ASSERT(safe_list[cpun] == PAUSE_IDLE);
    876 		return;
    877 	}
    878 	thread_lock(t);
    879 	t->t_cpu = CPU;		/* disp gets upset if last cpu is quiesced. */
    880 	t->t_bound_cpu = NULL;	/* Must un-bind; cpu may not be running. */
    881 	t->t_pri = v.v_nglobpris - 1;
    882 	ASSERT(safe_list[cpun] == PAUSE_IDLE);
    883 	safe_list[cpun] = PAUSE_DIE;
    884 	THREAD_TRANSITION(t);
    885 	setbackdq(t);
    886 	thread_unlock_nopreempt(t);
    887 
    888 	/*
    889 	 * If we don't wait for the thread to actually die, it may try to
    890 	 * run on the wrong cpu as part of an actual call to pause_cpus().
    891 	 */
    892 	mutex_enter(&pause_free_mutex);
    893 	while (safe_list[cpun] != PAUSE_DEAD) {
    894 		cv_wait(&pause_free_cv, &pause_free_mutex);
    895 	}
    896 	mutex_exit(&pause_free_mutex);
    897 	safe_list[cpun] = PAUSE_IDLE;
    898 
    899 	cp->cpu_pause_thread = NULL;
    900 }
    901 
    902 /*
    903  * Initialize basic structures for pausing CPUs.
    904  */
    905 void
    906 cpu_pause_init()
    907 {
    908 	sema_init(&cpu_pause_info.cp_sem, 0, NULL, SEMA_DEFAULT, NULL);
    909 	/*
    910 	 * Create initial CPU pause thread.
    911 	 */
    912 	cpu_pause_alloc(CPU);
    913 }
    914 
    915 /*
    916  * Start the threads used to pause another CPU.
    917  */
    918 static int
    919 cpu_pause_start(processorid_t cpu_id)
    920 {
    921 	int	i;
    922 	int	cpu_count = 0;
    923 
    924 	for (i = 0; i < NCPU; i++) {
    925 		cpu_t		*cp;
    926 		kthread_id_t	t;
    927 
    928 		cp = cpu[i];
    929 		if (!CPU_IN_SET(cpu_available, i) || (i == cpu_id)) {
    930 			safe_list[i] = PAUSE_WAIT;
    931 			continue;
    932 		}
    933 
    934 		/*
    935 		 * Skip CPU if it is quiesced or not yet started.
    936 		 */
    937 		if ((cp->cpu_flags & (CPU_QUIESCED | CPU_READY)) != CPU_READY) {
    938 			safe_list[i] = PAUSE_WAIT;
    939 			continue;
    940 		}
    941 
    942 		/*
    943 		 * Start this CPU's pause thread.
    944 		 */
    945 		t = cp->cpu_pause_thread;
    946 		thread_lock(t);
    947 		/*
    948 		 * Reset the priority, since nglobpris may have
    949 		 * changed since the thread was created, if someone
    950 		 * has loaded the RT (or some other) scheduling
    951 		 * class.
    952 		 */
    953 		t->t_pri = v.v_nglobpris - 1;
    954 		THREAD_TRANSITION(t);
    955 		setbackdq(t);
    956 		thread_unlock_nopreempt(t);
    957 		++cpu_count;
    958 	}
    959 	return (cpu_count);
    960 }
    961 
    962 
    963 /*
    964  * Pause all of the CPUs except the one we are on by creating a high
    965  * priority thread bound to those CPUs.
    966  *
    967  * Note that one must be extremely careful regarding code
    968  * executed while CPUs are paused.  Since a CPU may be paused
    969  * while a thread scheduling on that CPU is holding an adaptive
    970  * lock, code executed with CPUs paused must not acquire adaptive
    971  * (or low-level spin) locks.  Also, such code must not block,
    972  * since the thread that is supposed to initiate the wakeup may
    973  * never run.
    974  *
    975  * With a few exceptions, the restrictions on code executed with CPUs
    976  * paused match those for code executed at high-level interrupt
    977  * context.
    978  */
    979 void
    980 pause_cpus(cpu_t *off_cp)
    981 {
    982 	processorid_t	cpu_id;
    983 	int		i;
    984 	struct _cpu_pause_info	*cpi = &cpu_pause_info;
    985 
    986 	ASSERT(MUTEX_HELD(&cpu_lock));
    987 	ASSERT(cpi->cp_paused == NULL);
    988 	cpi->cp_count = 0;
    989 	cpi->cp_go = 0;
    990 	for (i = 0; i < NCPU; i++)
    991 		safe_list[i] = PAUSE_IDLE;
    992 	kpreempt_disable();
    993 
    994 	/*
    995 	 * If running on the cpu that is going offline, get off it.
    996 	 * This is so that it won't be necessary to rechoose a CPU
    997 	 * when done.
    998 	 */
    999 	if (CPU == off_cp)
   1000 		cpu_id = off_cp->cpu_next_part->cpu_id;
   1001 	else
   1002 		cpu_id = CPU->cpu_id;
   1003 	affinity_set(cpu_id);
   1004 
   1005 	/*
   1006 	 * Start the pause threads and record how many were started
   1007 	 */
   1008 	cpi->cp_count = cpu_pause_start(cpu_id);
   1009 
   1010 	/*
   1011 	 * Now wait for all CPUs to be running the pause thread.
   1012 	 */
   1013 	while (cpi->cp_count > 0) {
   1014 		/*
   1015 		 * Spin reading the count without grabbing the disp
   1016 		 * lock to make sure we don't prevent the pause
   1017 		 * threads from getting the lock.
   1018 		 */
   1019 		while (sema_held(&cpi->cp_sem))
   1020 			;
   1021 		if (sema_tryp(&cpi->cp_sem))
   1022 			--cpi->cp_count;
   1023 	}
   1024 	cpi->cp_go = 1;			/* all have reached cpu_pause */
   1025 
   1026 	/*
   1027 	 * Now wait for all CPUs to spl. (Transition from PAUSE_READY
   1028 	 * to PAUSE_WAIT.)
   1029 	 */
   1030 	for (i = 0; i < NCPU; i++) {
   1031 		while (safe_list[i] != PAUSE_WAIT)
   1032 			;
   1033 	}
   1034 	cpi->cp_spl = splhigh();	/* block dispatcher on this CPU */
   1035 	cpi->cp_paused = curthread;
   1036 }
   1037 
   1038 /*
   1039  * Check whether the current thread has CPUs paused
   1040  */
   1041 int
   1042 cpus_paused(void)
   1043 {
   1044 	if (cpu_pause_info.cp_paused != NULL) {
   1045 		ASSERT(cpu_pause_info.cp_paused == curthread);
   1046 		return (1);
   1047 	}
   1048 	return (0);
   1049 }
   1050 
   1051 static cpu_t *
   1052 cpu_get_all(processorid_t cpun)
   1053 {
   1054 	ASSERT(MUTEX_HELD(&cpu_lock));
   1055 
   1056 	if (cpun >= NCPU || cpun < 0 || !CPU_IN_SET(cpu_available, cpun))
   1057 		return (NULL);
   1058 	return (cpu[cpun]);
   1059 }
   1060 
   1061 /*
   1062  * Check whether cpun is a valid processor id and whether it should be
   1063  * visible from the current zone. If it is, return a pointer to the
   1064  * associated CPU structure.
   1065  */
   1066 cpu_t *
   1067 cpu_get(processorid_t cpun)
   1068 {
   1069 	cpu_t *c;
   1070 
   1071 	ASSERT(MUTEX_HELD(&cpu_lock));
   1072 	c = cpu_get_all(cpun);
   1073 	if (c != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
   1074 	    zone_pset_get(curproc->p_zone) != cpupart_query_cpu(c))
   1075 		return (NULL);
   1076 	return (c);
   1077 }
   1078 
   1079 /*
   1080  * The following functions should be used to check CPU states in the kernel.
   1081  * They should be invoked with cpu_lock held.  Kernel subsystems interested
   1082  * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc
   1083  * states.  Those are for user-land (and system call) use only.
   1084  */
   1085 
   1086 /*
   1087  * Determine whether the CPU is online and handling interrupts.
   1088  */
   1089 int
   1090 cpu_is_online(cpu_t *cpu)
   1091 {
   1092 	ASSERT(MUTEX_HELD(&cpu_lock));
   1093 	return (cpu_flagged_online(cpu->cpu_flags));
   1094 }
   1095 
   1096 /*
   1097  * Determine whether the CPU is offline (this includes spare and faulted).
   1098  */
   1099 int
   1100 cpu_is_offline(cpu_t *cpu)
   1101 {
   1102 	ASSERT(MUTEX_HELD(&cpu_lock));
   1103 	return (cpu_flagged_offline(cpu->cpu_flags));
   1104 }
   1105 
   1106 /*
   1107  * Determine whether the CPU is powered off.
   1108  */
   1109 int
   1110 cpu_is_poweredoff(cpu_t *cpu)
   1111 {
   1112 	ASSERT(MUTEX_HELD(&cpu_lock));
   1113 	return (cpu_flagged_poweredoff(cpu->cpu_flags));
   1114 }
   1115 
   1116 /*
   1117  * Determine whether the CPU is handling interrupts.
   1118  */
   1119 int
   1120 cpu_is_nointr(cpu_t *cpu)
   1121 {
   1122 	ASSERT(MUTEX_HELD(&cpu_lock));
   1123 	return (cpu_flagged_nointr(cpu->cpu_flags));
   1124 }
   1125 
   1126 /*
   1127  * Determine whether the CPU is active (scheduling threads).
   1128  */
   1129 int
   1130 cpu_is_active(cpu_t *cpu)
   1131 {
   1132 	ASSERT(MUTEX_HELD(&cpu_lock));
   1133 	return (cpu_flagged_active(cpu->cpu_flags));
   1134 }
   1135 
   1136 /*
   1137  * Same as above, but these require cpu_flags instead of cpu_t pointers.
   1138  */
   1139 int
   1140 cpu_flagged_online(cpu_flag_t cpu_flags)
   1141 {
   1142 	return (cpu_flagged_active(cpu_flags) &&
   1143 	    (cpu_flags & CPU_ENABLE));
   1144 }
   1145 
   1146 int
   1147 cpu_flagged_offline(cpu_flag_t cpu_flags)
   1148 {
   1149 	return (((cpu_flags & CPU_POWEROFF) == 0) &&
   1150 	    ((cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY));
   1151 }
   1152 
   1153 int
   1154 cpu_flagged_poweredoff(cpu_flag_t cpu_flags)
   1155 {
   1156 	return ((cpu_flags & CPU_POWEROFF) == CPU_POWEROFF);
   1157 }
   1158 
   1159 int
   1160 cpu_flagged_nointr(cpu_flag_t cpu_flags)
   1161 {
   1162 	return (cpu_flagged_active(cpu_flags) &&
   1163 	    (cpu_flags & CPU_ENABLE) == 0);
   1164 }
   1165 
   1166 int
   1167 cpu_flagged_active(cpu_flag_t cpu_flags)
   1168 {
   1169 	return (((cpu_flags & (CPU_POWEROFF | CPU_FAULTED | CPU_SPARE)) == 0) &&
   1170 	    ((cpu_flags & (CPU_READY | CPU_OFFLINE)) == CPU_READY));
   1171 }
   1172 
   1173 /*
   1174  * Bring the indicated CPU online.
   1175  */
   1176 int
   1177 cpu_online(cpu_t *cp)
   1178 {
   1179 	int	error = 0;
   1180 
   1181 	/*
   1182 	 * Handle on-line request.
   1183 	 *	This code must put the new CPU on the active list before
   1184 	 *	starting it because it will not be paused, and will start
   1185 	 * 	using the active list immediately.  The real start occurs
   1186 	 *	when the CPU_QUIESCED flag is turned off.
   1187 	 */
   1188 
   1189 	ASSERT(MUTEX_HELD(&cpu_lock));
   1190 
   1191 	/*
   1192 	 * Put all the cpus into a known safe place.
   1193 	 * No mutexes can be entered while CPUs are paused.
   1194 	 */
   1195 	error = mp_cpu_start(cp);	/* arch-dep hook */
   1196 	if (error == 0) {
   1197 		pg_cpupart_in(cp, cp->cpu_part);
   1198 		pause_cpus(NULL);
   1199 		cpu_add_active_internal(cp);
   1200 		if (cp->cpu_flags & CPU_FAULTED) {
   1201 			cp->cpu_flags &= ~CPU_FAULTED;
   1202 			mp_cpu_faulted_exit(cp);
   1203 		}
   1204 		cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN |
   1205 		    CPU_SPARE);
   1206 		CPU_NEW_GENERATION(cp);
   1207 		start_cpus();
   1208 		cpu_stats_kstat_create(cp);
   1209 		cpu_create_intrstat(cp);
   1210 		lgrp_kstat_create(cp);
   1211 		cpu_state_change_notify(cp->cpu_id, CPU_ON);
   1212 		cpu_intr_enable(cp);	/* arch-dep hook */
   1213 		cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON);
   1214 		cpu_set_state(cp);
   1215 		cyclic_online(cp);
   1216 		/*
   1217 		 * This has to be called only after cyclic_online(). This
   1218 		 * function uses cyclics.
   1219 		 */
   1220 		callout_cpu_online(cp);
   1221 		poke_cpu(cp->cpu_id);
   1222 	}
   1223 
   1224 	return (error);
   1225 }
   1226 
   1227 /*
   1228  * Take the indicated CPU offline.
   1229  */
   1230 int
   1231 cpu_offline(cpu_t *cp, int flags)
   1232 {
   1233 	cpupart_t *pp;
   1234 	int	error = 0;
   1235 	cpu_t	*ncp;
   1236 	int	intr_enable;
   1237 	int	cyclic_off = 0;
   1238 	int	callout_off = 0;
   1239 	int	loop_count;
   1240 	int	no_quiesce = 0;
   1241 	int	(*bound_func)(struct cpu *, int);
   1242 	kthread_t *t;
   1243 	lpl_t	*cpu_lpl;
   1244 	proc_t	*p;
   1245 	int	lgrp_diff_lpl;
   1246 	boolean_t unbind_all_threads = (flags & CPU_FORCED) != 0;
   1247 
   1248 	ASSERT(MUTEX_HELD(&cpu_lock));
   1249 
   1250 	/*
   1251 	 * If we're going from faulted or spare to offline, just
   1252 	 * clear these flags and update CPU state.
   1253 	 */
   1254 	if (cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) {
   1255 		if (cp->cpu_flags & CPU_FAULTED) {
   1256 			cp->cpu_flags &= ~CPU_FAULTED;
   1257 			mp_cpu_faulted_exit(cp);
   1258 		}
   1259 		cp->cpu_flags &= ~CPU_SPARE;
   1260 		cpu_set_state(cp);
   1261 		return (0);
   1262 	}
   1263 
   1264 	/*
   1265 	 * Handle off-line request.
   1266 	 */
   1267 	pp = cp->cpu_part;
   1268 	/*
   1269 	 * Don't offline last online CPU in partition
   1270 	 */
   1271 	if (ncpus_online <= 1 || pp->cp_ncpus <= 1 || cpu_intr_count(cp) < 2)
   1272 		return (EBUSY);
   1273 	/*
   1274 	 * Unbind all soft-bound threads bound to our CPU and hard bound threads
   1275 	 * if we were asked to.
   1276 	 */
   1277 	error = cpu_unbind(cp->cpu_id, unbind_all_threads);
   1278 	if (error != 0)
   1279 		return (error);
   1280 	/*
   1281 	 * We shouldn't be bound to this CPU ourselves.
   1282 	 */
   1283 	if (curthread->t_bound_cpu == cp)
   1284 		return (EBUSY);
   1285 
   1286 	/*
   1287 	 * Tell interested parties that this CPU is going offline.
   1288 	 */
   1289 	CPU_NEW_GENERATION(cp);
   1290 	cpu_state_change_notify(cp->cpu_id, CPU_OFF);
   1291 
   1292 	/*
   1293 	 * Tell the PG subsystem that the CPU is leaving the partition
   1294 	 */
   1295 	pg_cpupart_out(cp, pp);
   1296 
   1297 	/*
   1298 	 * Take the CPU out of interrupt participation so we won't find
   1299 	 * bound kernel threads.  If the architecture cannot completely
   1300 	 * shut off interrupts on the CPU, don't quiesce it, but don't
   1301 	 * run anything but interrupt thread... this is indicated by
   1302 	 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being
   1303 	 * off.
   1304 	 */
   1305 	intr_enable = cp->cpu_flags & CPU_ENABLE;
   1306 	if (intr_enable)
   1307 		no_quiesce = cpu_intr_disable(cp);
   1308 
   1309 	/*
   1310 	 * Record that we are aiming to offline this cpu.  This acts as
   1311 	 * a barrier to further weak binding requests in thread_nomigrate
   1312 	 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to
   1313 	 * lean away from this cpu.  Further strong bindings are already
   1314 	 * avoided since we hold cpu_lock.  Since threads that are set
   1315 	 * runnable around now and others coming off the target cpu are
   1316 	 * directed away from the target, existing strong and weak bindings
   1317 	 * (especially the latter) to the target cpu stand maximum chance of
   1318 	 * being able to unbind during the short delay loop below (if other
   1319 	 * unbound threads compete they may not see cpu in time to unbind
   1320 	 * even if they would do so immediately.
   1321 	 */
   1322 	cpu_inmotion = cp;
   1323 	membar_enter();
   1324 
   1325 	/*
   1326 	 * Check for kernel threads (strong or weak) bound to that CPU.
   1327 	 * Strongly bound threads may not unbind, and we'll have to return
   1328 	 * EBUSY.  Weakly bound threads should always disappear - we've
   1329 	 * stopped more weak binding with cpu_inmotion and existing
   1330 	 * bindings will drain imminently (they may not block).  Nonetheless
   1331 	 * we will wait for a fixed period for all bound threads to disappear.
   1332 	 * Inactive interrupt threads are OK (they'll be in TS_FREE
   1333 	 * state).  If test finds some bound threads, wait a few ticks
   1334 	 * to give short-lived threads (such as interrupts) chance to
   1335 	 * complete.  Note that if no_quiesce is set, i.e. this cpu
   1336 	 * is required to service interrupts, then we take the route
   1337 	 * that permits interrupt threads to be active (or bypassed).
   1338 	 */
   1339 	bound_func = no_quiesce ? disp_bound_threads : disp_bound_anythreads;
   1340 
   1341 again:	for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) {
   1342 		if (loop_count >= 5) {
   1343 			error = EBUSY;	/* some threads still bound */
   1344 			break;
   1345 		}
   1346 
   1347 		/*
   1348 		 * If some threads were assigned, give them
   1349 		 * a chance to complete or move.
   1350 		 *
   1351 		 * This assumes that the clock_thread is not bound
   1352 		 * to any CPU, because the clock_thread is needed to
   1353 		 * do the delay(hz/100).
   1354 		 *
   1355 		 * Note: we still hold the cpu_lock while waiting for
   1356 		 * the next clock tick.  This is OK since it isn't
   1357 		 * needed for anything else except processor_bind(2),
   1358 		 * and system initialization.  If we drop the lock,
   1359 		 * we would risk another p_online disabling the last
   1360 		 * processor.
   1361 		 */
   1362 		delay(hz/100);
   1363 	}
   1364 
   1365 	if (error == 0 && callout_off == 0) {
   1366 		callout_cpu_offline(cp);
   1367 		callout_off = 1;
   1368 	}
   1369 
   1370 	if (error == 0 && cyclic_off == 0) {
   1371 		if (!cyclic_offline(cp)) {
   1372 			/*
   1373 			 * We must have bound cyclics...
   1374 			 */
   1375 			error = EBUSY;
   1376 			goto out;
   1377 		}
   1378 		cyclic_off = 1;
   1379 	}
   1380 
   1381 	/*
   1382 	 * Call mp_cpu_stop() to perform any special operations
   1383 	 * needed for this machine architecture to offline a CPU.
   1384 	 */
   1385 	if (error == 0)
   1386 		error = mp_cpu_stop(cp);	/* arch-dep hook */
   1387 
   1388 	/*
   1389 	 * If that all worked, take the CPU offline and decrement
   1390 	 * ncpus_online.
   1391 	 */
   1392 	if (error == 0) {
   1393 		/*
   1394 		 * Put all the cpus into a known safe place.
   1395 		 * No mutexes can be entered while CPUs are paused.
   1396 		 */
   1397 		pause_cpus(cp);
   1398 		/*
   1399 		 * Repeat the operation, if necessary, to make sure that
   1400 		 * all outstanding low-level interrupts run to completion
   1401 		 * before we set the CPU_QUIESCED flag.  It's also possible
   1402 		 * that a thread has weak bound to the cpu despite our raising
   1403 		 * cpu_inmotion above since it may have loaded that
   1404 		 * value before the barrier became visible (this would have
   1405 		 * to be the thread that was on the target cpu at the time
   1406 		 * we raised the barrier).
   1407 		 */
   1408 		if ((!no_quiesce && cp->cpu_intr_actv != 0) ||
   1409 		    (*bound_func)(cp, 1)) {
   1410 			start_cpus();
   1411 			(void) mp_cpu_start(cp);
   1412 			goto again;
   1413 		}
   1414 		ncp = cp->cpu_next_part;
   1415 		cpu_lpl = cp->cpu_lpl;
   1416 		ASSERT(cpu_lpl != NULL);
   1417 
   1418 		/*
   1419 		 * Remove the CPU from the list of active CPUs.
   1420 		 */
   1421 		cpu_remove_active(cp);
   1422 
   1423 		/*
   1424 		 * Walk the active process list and look for threads
   1425 		 * whose home lgroup needs to be updated, or
   1426 		 * the last CPU they run on is the one being offlined now.
   1427 		 */
   1428 
   1429 		ASSERT(curthread->t_cpu != cp);
   1430 		for (p = practive; p != NULL; p = p->p_next) {
   1431 
   1432 			t = p->p_tlist;
   1433 
   1434 			if (t == NULL)
   1435 				continue;
   1436 
   1437 			lgrp_diff_lpl = 0;
   1438 
   1439 			do {
   1440 				ASSERT(t->t_lpl != NULL);
   1441 				/*
   1442 				 * Taking last CPU in lpl offline
   1443 				 * Rehome thread if it is in this lpl
   1444 				 * Otherwise, update the count of how many
   1445 				 * threads are in this CPU's lgroup but have
   1446 				 * a different lpl.
   1447 				 */
   1448 
   1449 				if (cpu_lpl->lpl_ncpu == 0) {
   1450 					if (t->t_lpl == cpu_lpl)
   1451 						lgrp_move_thread(t,
   1452 						    lgrp_choose(t,
   1453 						    t->t_cpupart), 0);
   1454 					else if (t->t_lpl->lpl_lgrpid ==
   1455 					    cpu_lpl->lpl_lgrpid)
   1456 						lgrp_diff_lpl++;
   1457 				}
   1458 				ASSERT(t->t_lpl->lpl_ncpu > 0);
   1459 
   1460 				/*
   1461 				 * Update CPU last ran on if it was this CPU
   1462 				 */
   1463 				if (t->t_cpu == cp && t->t_bound_cpu != cp)
   1464 					t->t_cpu = disp_lowpri_cpu(ncp,
   1465 					    t->t_lpl, t->t_pri, NULL);
   1466 				ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp ||
   1467 				    t->t_weakbound_cpu == cp);
   1468 
   1469 				t = t->t_forw;
   1470 			} while (t != p->p_tlist);
   1471 
   1472 			/*
   1473 			 * Didn't find any threads in the same lgroup as this
   1474 			 * CPU with a different lpl, so remove the lgroup from
   1475 			 * the process lgroup bitmask.
   1476 			 */
   1477 
   1478 			if (lgrp_diff_lpl == 0)
   1479 				klgrpset_del(p->p_lgrpset, cpu_lpl->lpl_lgrpid);
   1480 		}
   1481 
   1482 		/*
   1483 		 * Walk thread list looking for threads that need to be
   1484 		 * rehomed, since there are some threads that are not in
   1485 		 * their process's p_tlist.
   1486 		 */
   1487 
   1488 		t = curthread;
   1489 		do {
   1490 			ASSERT(t != NULL && t->t_lpl != NULL);
   1491 
   1492 			/*
   1493 			 * Rehome threads with same lpl as this CPU when this
   1494 			 * is the last CPU in the lpl.
   1495 			 */
   1496 
   1497 			if ((cpu_lpl->lpl_ncpu == 0) && (t->t_lpl == cpu_lpl))
   1498 				lgrp_move_thread(t,
   1499 				    lgrp_choose(t, t->t_cpupart), 1);
   1500 
   1501 			ASSERT(t->t_lpl->lpl_ncpu > 0);
   1502 
   1503 			/*
   1504 			 * Update CPU last ran on if it was this CPU
   1505 			 */
   1506 
   1507 			if (t->t_cpu == cp && t->t_bound_cpu != cp) {
   1508 				t->t_cpu = disp_lowpri_cpu(ncp,
   1509 				    t->t_lpl, t->t_pri, NULL);
   1510 			}
   1511 			ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp ||
   1512 			    t->t_weakbound_cpu == cp);
   1513 			t = t->t_next;
   1514 
   1515 		} while (t != curthread);
   1516 		ASSERT((cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) == 0);
   1517 		cp->cpu_flags |= CPU_OFFLINE;
   1518 		disp_cpu_inactive(cp);
   1519 		if (!no_quiesce)
   1520 			cp->cpu_flags |= CPU_QUIESCED;
   1521 		ncpus_online--;
   1522 		cpu_set_state(cp);
   1523 		cpu_inmotion = NULL;
   1524 		start_cpus();
   1525 		cpu_stats_kstat_destroy(cp);
   1526 		cpu_delete_intrstat(cp);
   1527 		lgrp_kstat_destroy(cp);
   1528 	}
   1529 
   1530 out:
   1531 	cpu_inmotion = NULL;
   1532 
   1533 	/*
   1534 	 * If we failed, re-enable interrupts.
   1535 	 * Do this even if cpu_intr_disable returned an error, because
   1536 	 * it may have partially disabled interrupts.
   1537 	 */
   1538 	if (error && intr_enable)
   1539 		cpu_intr_enable(cp);
   1540 
   1541 	/*
   1542 	 * If we failed, but managed to offline the cyclic subsystem on this
   1543 	 * CPU, bring it back online.
   1544 	 */
   1545 	if (error && cyclic_off)
   1546 		cyclic_online(cp);
   1547 
   1548 	/*
   1549 	 * If we failed, but managed to offline callouts on this CPU,
   1550 	 * bring it back online.
   1551 	 */
   1552 	if (error && callout_off)
   1553 		callout_cpu_online(cp);
   1554 
   1555 	/*
   1556 	 * If we failed, tell the PG subsystem that the CPU is back
   1557 	 */
   1558 	pg_cpupart_in(cp, pp);
   1559 
   1560 	/*
   1561 	 * If we failed, we need to notify everyone that this CPU is back on.
   1562 	 */
   1563 	if (error != 0) {
   1564 		CPU_NEW_GENERATION(cp);
   1565 		cpu_state_change_notify(cp->cpu_id, CPU_ON);
   1566 		cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON);
   1567 	}
   1568 
   1569 	return (error);
   1570 }
   1571 
   1572 /*
   1573  * Mark the indicated CPU as faulted, taking it offline.
   1574  */
   1575 int
   1576 cpu_faulted(cpu_t *cp, int flags)
   1577 {
   1578 	int	error = 0;
   1579 
   1580 	ASSERT(MUTEX_HELD(&cpu_lock));
   1581 	ASSERT(!cpu_is_poweredoff(cp));
   1582 
   1583 	if (cpu_is_offline(cp)) {
   1584 		cp->cpu_flags &= ~CPU_SPARE;
   1585 		cp->cpu_flags |= CPU_FAULTED;
   1586 		mp_cpu_faulted_enter(cp);
   1587 		cpu_set_state(cp);
   1588 		return (0);
   1589 	}
   1590 
   1591 	if ((error = cpu_offline(cp, flags)) == 0) {
   1592 		cp->cpu_flags |= CPU_FAULTED;
   1593 		mp_cpu_faulted_enter(cp);
   1594 		cpu_set_state(cp);
   1595 	}
   1596 
   1597 	return (error);
   1598 }
   1599 
   1600 /*
   1601  * Mark the indicated CPU as a spare, taking it offline.
   1602  */
   1603 int
   1604 cpu_spare(cpu_t *cp, int flags)
   1605 {
   1606 	int	error = 0;
   1607 
   1608 	ASSERT(MUTEX_HELD(&cpu_lock));
   1609 	ASSERT(!cpu_is_poweredoff(cp));
   1610 
   1611 	if (cpu_is_offline(cp)) {
   1612 		if (cp->cpu_flags & CPU_FAULTED) {
   1613 			cp->cpu_flags &= ~CPU_FAULTED;
   1614 			mp_cpu_faulted_exit(cp);
   1615 		}
   1616 		cp->cpu_flags |= CPU_SPARE;
   1617 		cpu_set_state(cp);
   1618 		return (0);
   1619 	}
   1620 
   1621 	if ((error = cpu_offline(cp, flags)) == 0) {
   1622 		cp->cpu_flags |= CPU_SPARE;
   1623 		cpu_set_state(cp);
   1624 	}
   1625 
   1626 	return (error);
   1627 }
   1628 
   1629 /*
   1630  * Take the indicated CPU from poweroff to offline.
   1631  */
   1632 int
   1633 cpu_poweron(cpu_t *cp)
   1634 {
   1635 	int	error = ENOTSUP;
   1636 
   1637 	ASSERT(MUTEX_HELD(&cpu_lock));
   1638 	ASSERT(cpu_is_poweredoff(cp));
   1639 
   1640 	error = mp_cpu_poweron(cp);	/* arch-dep hook */
   1641 	if (error == 0)
   1642 		cpu_set_state(cp);
   1643 
   1644 	return (error);
   1645 }
   1646 
   1647 /*
   1648  * Take the indicated CPU from any inactive state to powered off.
   1649  */
   1650 int
   1651 cpu_poweroff(cpu_t *cp)
   1652 {
   1653 	int	error = ENOTSUP;
   1654 
   1655 	ASSERT(MUTEX_HELD(&cpu_lock));
   1656 	ASSERT(cpu_is_offline(cp));
   1657 
   1658 	if (!(cp->cpu_flags & CPU_QUIESCED))
   1659 		return (EBUSY);		/* not completely idle */
   1660 
   1661 	error = mp_cpu_poweroff(cp);	/* arch-dep hook */
   1662 	if (error == 0)
   1663 		cpu_set_state(cp);
   1664 
   1665 	return (error);
   1666 }
   1667 
   1668 /*
   1669  * Initialize the Sequential CPU id lookup table
   1670  */
   1671 void
   1672 cpu_seq_tbl_init()
   1673 {
   1674 	cpu_t	**tbl;
   1675 
   1676 	tbl = kmem_zalloc(sizeof (struct cpu *) * max_ncpus, KM_SLEEP);
   1677 	tbl[0] = CPU;
   1678 
   1679 	cpu_seq = tbl;
   1680 }
   1681 
   1682 /*
   1683  * Initialize the CPU lists for the first CPU.
   1684  */
   1685 void
   1686 cpu_list_init(cpu_t *cp)
   1687 {
   1688 	cp->cpu_next = cp;
   1689 	cp->cpu_prev = cp;
   1690 	cpu_list = cp;
   1691 	clock_cpu_list = cp;
   1692 
   1693 	cp->cpu_next_onln = cp;
   1694 	cp->cpu_prev_onln = cp;
   1695 	cpu_active = cp;
   1696 
   1697 	cp->cpu_seqid = 0;
   1698 	CPUSET_ADD(cpu_seqid_inuse, 0);
   1699 
   1700 	/*
   1701 	 * Bootstrap cpu_seq using cpu_list
   1702 	 * The cpu_seq[] table will be dynamically allocated
   1703 	 * when kmem later becomes available (but before going MP)
   1704 	 */
   1705 	cpu_seq = &cpu_list;
   1706 
   1707 	cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid);
   1708 	cp_default.cp_cpulist = cp;
   1709 	cp_default.cp_ncpus = 1;
   1710 	cp->cpu_next_part = cp;
   1711 	cp->cpu_prev_part = cp;
   1712 	cp->cpu_part = &cp_default;
   1713 
   1714 	CPUSET_ADD(cpu_available, cp->cpu_id);
   1715 }
   1716 
   1717 /*
   1718  * Insert a CPU into the list of available CPUs.
   1719  */
   1720 void
   1721 cpu_add_unit(cpu_t *cp)
   1722 {
   1723 	int seqid;
   1724 
   1725 	ASSERT(MUTEX_HELD(&cpu_lock));
   1726 	ASSERT(cpu_list != NULL);	/* list started in cpu_list_init */
   1727 
   1728 	lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)cp, 0);
   1729 
   1730 	/*
   1731 	 * Note: most users of the cpu_list will grab the
   1732 	 * cpu_lock to insure that it isn't modified.  However,
   1733 	 * certain users can't or won't do that.  To allow this
   1734 	 * we pause the other cpus.  Users who walk the list
   1735 	 * without cpu_lock, must disable kernel preemption
   1736 	 * to insure that the list isn't modified underneath
   1737 	 * them.  Also, any cached pointers to cpu structures
   1738 	 * must be revalidated by checking to see if the
   1739 	 * cpu_next pointer points to itself.  This check must
   1740 	 * be done with the cpu_lock held or kernel preemption
   1741 	 * disabled.  This check relies upon the fact that
   1742 	 * old cpu structures are not free'ed or cleared after
   1743 	 * then are removed from the cpu_list.
   1744 	 *
   1745 	 * Note that the clock code walks the cpu list dereferencing
   1746 	 * the cpu_part pointer, so we need to initialize it before
   1747 	 * adding the cpu to the list.
   1748 	 */
   1749 	cp->cpu_part = &cp_default;
   1750 	(void) pause_cpus(NULL);
   1751 	cp->cpu_next = cpu_list;
   1752 	cp->cpu_prev = cpu_list->cpu_prev;
   1753 	cpu_list->cpu_prev->cpu_next = cp;
   1754 	cpu_list->cpu_prev = cp;
   1755 	start_cpus();
   1756 
   1757 	for (seqid = 0; CPU_IN_SET(cpu_seqid_inuse, seqid); seqid++)
   1758 		continue;
   1759 	CPUSET_ADD(cpu_seqid_inuse, seqid);
   1760 	cp->cpu_seqid = seqid;
   1761 	ASSERT(ncpus < max_ncpus);
   1762 	ncpus++;
   1763 	cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid);
   1764 	cpu[cp->cpu_id] = cp;
   1765 	CPUSET_ADD(cpu_available, cp->cpu_id);
   1766 	cpu_seq[cp->cpu_seqid] = cp;
   1767 
   1768 	/*
   1769 	 * allocate a pause thread for this CPU.
   1770 	 */
   1771 	cpu_pause_alloc(cp);
   1772 
   1773 	/*
   1774 	 * So that new CPUs won't have NULL prev_onln and next_onln pointers,
   1775 	 * link them into a list of just that CPU.
   1776 	 * This is so that disp_lowpri_cpu will work for thread_create in
   1777 	 * pause_cpus() when called from the startup thread in a new CPU.
   1778 	 */
   1779 	cp->cpu_next_onln = cp;
   1780 	cp->cpu_prev_onln = cp;
   1781 	cpu_info_kstat_create(cp);
   1782 	cp->cpu_next_part = cp;
   1783 	cp->cpu_prev_part = cp;
   1784 
   1785 	init_cpu_mstate(cp, CMS_SYSTEM);
   1786 
   1787 	pool_pset_mod = gethrtime();
   1788 }
   1789 
   1790 /*
   1791  * Do the opposite of cpu_add_unit().
   1792  */
   1793 void
   1794 cpu_del_unit(int cpuid)
   1795 {
   1796 	struct cpu	*cp, *cpnext;
   1797 
   1798 	ASSERT(MUTEX_HELD(&cpu_lock));
   1799 	cp = cpu[cpuid];
   1800 	ASSERT(cp != NULL);
   1801 
   1802 	ASSERT(cp->cpu_next_onln == cp);
   1803 	ASSERT(cp->cpu_prev_onln == cp);
   1804 	ASSERT(cp->cpu_next_part == cp);
   1805 	ASSERT(cp->cpu_prev_part == cp);
   1806 
   1807 	/*
   1808 	 * Tear down the CPU's physical ID cache, and update any
   1809 	 * processor groups
   1810 	 */
   1811 	pg_cpu_fini(cp, NULL);
   1812 	pghw_physid_destroy(cp);
   1813 
   1814 	/*
   1815 	 * Destroy kstat stuff.
   1816 	 */
   1817 	cpu_info_kstat_destroy(cp);
   1818 	term_cpu_mstate(cp);
   1819 	/*
   1820 	 * Free up pause thread.
   1821 	 */
   1822 	cpu_pause_free(cp);
   1823 	CPUSET_DEL(cpu_available, cp->cpu_id);
   1824 	cpu[cp->cpu_id] = NULL;
   1825 	cpu_seq[cp->cpu_seqid] = NULL;
   1826 
   1827 	/*
   1828 	 * The clock thread and mutex_vector_enter cannot hold the
   1829 	 * cpu_lock while traversing the cpu list, therefore we pause
   1830 	 * all other threads by pausing the other cpus. These, and any
   1831 	 * other routines holding cpu pointers while possibly sleeping
   1832 	 * must be sure to call kpreempt_disable before processing the
   1833 	 * list and be sure to check that the cpu has not been deleted
   1834 	 * after any sleeps (check cp->cpu_next != NULL). We guarantee
   1835 	 * to keep the deleted cpu structure around.
   1836 	 *
   1837 	 * Note that this MUST be done AFTER cpu_available
   1838 	 * has been updated so that we don't waste time
   1839 	 * trying to pause the cpu we're trying to delete.
   1840 	 */
   1841 	(void) pause_cpus(NULL);
   1842 
   1843 	cpnext = cp->cpu_next;
   1844 	cp->cpu_prev->cpu_next = cp->cpu_next;
   1845 	cp->cpu_next->cpu_prev = cp->cpu_prev;
   1846 	if (cp == cpu_list)
   1847 		cpu_list = cpnext;
   1848 
   1849 	/*
   1850 	 * Signals that the cpu has been deleted (see above).
   1851 	 */
   1852 	cp->cpu_next = NULL;
   1853 	cp->cpu_prev = NULL;
   1854 
   1855 	start_cpus();
   1856 
   1857 	CPUSET_DEL(cpu_seqid_inuse, cp->cpu_seqid);
   1858 	ncpus--;
   1859 	lgrp_config(LGRP_CONFIG_CPU_DEL, (uintptr_t)cp, 0);
   1860 
   1861 	pool_pset_mod = gethrtime();
   1862 }
   1863 
   1864 /*
   1865  * Add a CPU to the list of active CPUs.
   1866  *	This routine must not get any locks, because other CPUs are paused.
   1867  */
   1868 static void
   1869 cpu_add_active_internal(cpu_t *cp)
   1870 {
   1871 	cpupart_t	*pp = cp->cpu_part;
   1872 
   1873 	ASSERT(MUTEX_HELD(&cpu_lock));
   1874 	ASSERT(cpu_list != NULL);	/* list started in cpu_list_init */
   1875 
   1876 	ncpus_online++;
   1877 	cpu_set_state(cp);
   1878 	cp->cpu_next_onln = cpu_active;
   1879 	cp->cpu_prev_onln = cpu_active->cpu_prev_onln;
   1880 	cpu_active->cpu_prev_onln->cpu_next_onln = cp;
   1881 	cpu_active->cpu_prev_onln = cp;
   1882 
   1883 	if (pp->cp_cpulist) {
   1884 		cp->cpu_next_part = pp->cp_cpulist;
   1885 		cp->cpu_prev_part = pp->cp_cpulist->cpu_prev_part;
   1886 		pp->cp_cpulist->cpu_prev_part->cpu_next_part = cp;
   1887 		pp->cp_cpulist->cpu_prev_part = cp;
   1888 	} else {
   1889 		ASSERT(pp->cp_ncpus == 0);
   1890 		pp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
   1891 	}
   1892 	pp->cp_ncpus++;
   1893 	if (pp->cp_ncpus == 1) {
   1894 		cp_numparts_nonempty++;
   1895 		ASSERT(cp_numparts_nonempty != 0);
   1896 	}
   1897 
   1898 	pg_cpu_active(cp);
   1899 	lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0);
   1900 
   1901 	bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg));
   1902 }
   1903 
   1904 /*
   1905  * Add a CPU to the list of active CPUs.
   1906  *	This is called from machine-dependent layers when a new CPU is started.
   1907  */
   1908 void
   1909 cpu_add_active(cpu_t *cp)
   1910 {
   1911 	pg_cpupart_in(cp, cp->cpu_part);
   1912 
   1913 	pause_cpus(NULL);
   1914 	cpu_add_active_internal(cp);
   1915 	start_cpus();
   1916 
   1917 	cpu_stats_kstat_create(cp);
   1918 	cpu_create_intrstat(cp);
   1919 	lgrp_kstat_create(cp);
   1920 	cpu_state_change_notify(cp->cpu_id, CPU_INIT);
   1921 }
   1922 
   1923 
   1924 /*
   1925  * Remove a CPU from the list of active CPUs.
   1926  *	This routine must not get any locks, because other CPUs are paused.
   1927  */
   1928 /* ARGSUSED */
   1929 static void
   1930 cpu_remove_active(cpu_t *cp)
   1931 {
   1932 	cpupart_t	*pp = cp->cpu_part;
   1933 
   1934 	ASSERT(MUTEX_HELD(&cpu_lock));
   1935 	ASSERT(cp->cpu_next_onln != cp);	/* not the last one */
   1936 	ASSERT(cp->cpu_prev_onln != cp);	/* not the last one */
   1937 
   1938 	pg_cpu_inactive(cp);
   1939 
   1940 	lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0);
   1941 
   1942 	if (cp == clock_cpu_list)
   1943 		clock_cpu_list = cp->cpu_next_onln;
   1944 
   1945 	cp->cpu_prev_onln->cpu_next_onln = cp->cpu_next_onln;
   1946 	cp->cpu_next_onln->cpu_prev_onln = cp->cpu_prev_onln;
   1947 	if (cpu_active == cp) {
   1948 		cpu_active = cp->cpu_next_onln;
   1949 	}
   1950 	cp->cpu_next_onln = cp;
   1951 	cp->cpu_prev_onln = cp;
   1952 
   1953 	cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
   1954 	cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
   1955 	if (pp->cp_cpulist == cp) {
   1956 		pp->cp_cpulist = cp->cpu_next_part;
   1957 		ASSERT(pp->cp_cpulist != cp);
   1958 	}
   1959 	cp->cpu_next_part = cp;
   1960 	cp->cpu_prev_part = cp;
   1961 	pp->cp_ncpus--;
   1962 	if (pp->cp_ncpus == 0) {
   1963 		cp_numparts_nonempty--;
   1964 		ASSERT(cp_numparts_nonempty != 0);
   1965 	}
   1966 }
   1967 
   1968 /*
   1969  * Routine used to setup a newly inserted CPU in preparation for starting
   1970  * it running code.
   1971  */
   1972 int
   1973 cpu_configure(int cpuid)
   1974 {
   1975 	int retval = 0;
   1976 
   1977 	ASSERT(MUTEX_HELD(&cpu_lock));
   1978 
   1979 	/*
   1980 	 * Some structures are statically allocated based upon
   1981 	 * the maximum number of cpus the system supports.  Do not
   1982 	 * try to add anything beyond this limit.
   1983 	 */
   1984 	if (cpuid < 0 || cpuid >= NCPU) {
   1985 		return (EINVAL);
   1986 	}
   1987 
   1988 	if ((cpu[cpuid] != NULL) && (cpu[cpuid]->cpu_flags != 0)) {
   1989 		return (EALREADY);
   1990 	}
   1991 
   1992 	if ((retval = mp_cpu_configure(cpuid)) != 0) {
   1993 		return (retval);
   1994 	}
   1995 
   1996 	cpu[cpuid]->cpu_flags = CPU_QUIESCED | CPU_OFFLINE | CPU_POWEROFF;
   1997 	cpu_set_state(cpu[cpuid]);
   1998 	retval = cpu_state_change_hooks(cpuid, CPU_CONFIG, CPU_UNCONFIG);
   1999 	if (retval != 0)
   2000 		(void) mp_cpu_unconfigure(cpuid);
   2001 
   2002 	return (retval);
   2003 }
   2004 
   2005 /*
   2006  * Routine used to cleanup a CPU that has been powered off.  This will
   2007  * destroy all per-cpu information related to this cpu.
   2008  */
   2009 int
   2010 cpu_unconfigure(int cpuid)
   2011 {
   2012 	int error;
   2013 
   2014 	ASSERT(MUTEX_HELD(&cpu_lock));
   2015 
   2016 	if (cpu[cpuid] == NULL) {
   2017 		return (ENODEV);
   2018 	}
   2019 
   2020 	if (cpu[cpuid]->cpu_flags == 0) {
   2021 		return (EALREADY);
   2022 	}
   2023 
   2024 	if ((cpu[cpuid]->cpu_flags & CPU_POWEROFF) == 0) {
   2025 		return (EBUSY);
   2026 	}
   2027 
   2028 	if (cpu[cpuid]->cpu_props != NULL) {
   2029 		(void) nvlist_free(cpu[cpuid]->cpu_props);
   2030 		cpu[cpuid]->cpu_props = NULL;
   2031 	}
   2032 
   2033 	error = cpu_state_change_hooks(cpuid, CPU_UNCONFIG, CPU_CONFIG);
   2034 
   2035 	if (error != 0)
   2036 		return (error);
   2037 
   2038 	return (mp_cpu_unconfigure(cpuid));
   2039 }
   2040 
   2041 /*
   2042  * Routines for registering and de-registering cpu_setup callback functions.
   2043  *
   2044  * Caller's context
   2045  *	These routines must not be called from a driver's attach(9E) or
   2046  *	detach(9E) entry point.
   2047  *
   2048  * NOTE: CPU callbacks should not block. They are called with cpu_lock held.
   2049  */
   2050 
   2051 /*
   2052  * Ideally, these would be dynamically allocated and put into a linked
   2053  * list; however that is not feasible because the registration routine
   2054  * has to be available before the kmem allocator is working (in fact,
   2055  * it is called by the kmem allocator init code).  In any case, there
   2056  * are quite a few extra entries for future users.
   2057  */
   2058 #define	NCPU_SETUPS	20
   2059 
   2060 struct cpu_setup {
   2061 	cpu_setup_func_t *func;
   2062 	void *arg;
   2063 } cpu_setups[NCPU_SETUPS];
   2064 
   2065 void
   2066 register_cpu_setup_func(cpu_setup_func_t *func, void *arg)
   2067 {
   2068 	int i;
   2069 
   2070 	ASSERT(MUTEX_HELD(&cpu_lock));
   2071 
   2072 	for (i = 0; i < NCPU_SETUPS; i++)
   2073 		if (cpu_setups[i].func == NULL)
   2074 			break;
   2075 	if (i >= NCPU_SETUPS)
   2076 		cmn_err(CE_PANIC, "Ran out of cpu_setup callback entries");
   2077 
   2078 	cpu_setups[i].func = func;
   2079 	cpu_setups[i].arg = arg;
   2080 }
   2081 
   2082 void
   2083 unregister_cpu_setup_func(cpu_setup_func_t *func, void *arg)
   2084 {
   2085 	int i;
   2086 
   2087 	ASSERT(MUTEX_HELD(&cpu_lock));
   2088 
   2089 	for (i = 0; i < NCPU_SETUPS; i++)
   2090 		if ((cpu_setups[i].func == func) &&
   2091 		    (cpu_setups[i].arg == arg))
   2092 			break;
   2093 	if (i >= NCPU_SETUPS)
   2094 		cmn_err(CE_PANIC, "Could not find cpu_setup callback to "
   2095 		    "deregister");
   2096 
   2097 	cpu_setups[i].func = NULL;
   2098 	cpu_setups[i].arg = 0;
   2099 }
   2100 
   2101 /*
   2102  * Call any state change hooks for this CPU, ignore any errors.
   2103  */
   2104 void
   2105 cpu_state_change_notify(int id, cpu_setup_t what)
   2106 {
   2107 	int i;
   2108 
   2109 	ASSERT(MUTEX_HELD(&cpu_lock));
   2110 
   2111 	for (i = 0; i < NCPU_SETUPS; i++) {
   2112 		if (cpu_setups[i].func != NULL) {
   2113 			cpu_setups[i].func(what, id, cpu_setups[i].arg);
   2114 		}
   2115 	}
   2116 }
   2117 
   2118 /*
   2119  * Call any state change hooks for this CPU, undo it if error found.
   2120  */
   2121 static int
   2122 cpu_state_change_hooks(int id, cpu_setup_t what, cpu_setup_t undo)
   2123 {
   2124 	int i;
   2125 	int retval = 0;
   2126 
   2127 	ASSERT(MUTEX_HELD(&cpu_lock));
   2128 
   2129 	for (i = 0; i < NCPU_SETUPS; i++) {
   2130 		if (cpu_setups[i].func != NULL) {
   2131 			retval = cpu_setups[i].func(what, id,
   2132 			    cpu_setups[i].arg);
   2133 			if (retval) {
   2134 				for (i--; i >= 0; i--) {
   2135 					if (cpu_setups[i].func != NULL)
   2136 						cpu_setups[i].func(undo,
   2137 						    id, cpu_setups[i].arg);
   2138 				}
   2139 				break;
   2140 			}
   2141 		}
   2142 	}
   2143 	return (retval);
   2144 }
   2145 
   2146 /*
   2147  * Export information about this CPU via the kstat mechanism.
   2148  */
   2149 static struct {
   2150 	kstat_named_t ci_state;
   2151 	kstat_named_t ci_state_begin;
   2152 	kstat_named_t ci_cpu_type;
   2153 	kstat_named_t ci_fpu_type;
   2154 	kstat_named_t ci_clock_MHz;
   2155 	kstat_named_t ci_chip_id;
   2156 	kstat_named_t ci_implementation;
   2157 	kstat_named_t ci_brandstr;
   2158 	kstat_named_t ci_core_id;
   2159 	kstat_named_t ci_curr_clock_Hz;
   2160 	kstat_named_t ci_supp_freq_Hz;
   2161 	kstat_named_t ci_pg_id;
   2162 #if defined(__sparcv9)
   2163 	kstat_named_t ci_device_ID;
   2164 	kstat_named_t ci_cpu_fru;
   2165 #endif
   2166 #if defined(__x86)
   2167 	kstat_named_t ci_vendorstr;
   2168 	kstat_named_t ci_family;
   2169 	kstat_named_t ci_model;
   2170 	kstat_named_t ci_step;
   2171 	kstat_named_t ci_clogid;
   2172 	kstat_named_t ci_pkg_core_id;
   2173 	kstat_named_t ci_ncpuperchip;
   2174 	kstat_named_t ci_ncoreperchip;
   2175 	kstat_named_t ci_max_cstates;
   2176 	kstat_named_t ci_curr_cstate;
   2177 	kstat_named_t ci_cacheid;
   2178 	kstat_named_t ci_sktstr;
   2179 #endif
   2180 } cpu_info_template = {
   2181 	{ "state",			KSTAT_DATA_CHAR },
   2182 	{ "state_begin",		KSTAT_DATA_LONG },
   2183 	{ "cpu_type",			KSTAT_DATA_CHAR },
   2184 	{ "fpu_type",			KSTAT_DATA_CHAR },
   2185 	{ "clock_MHz",			KSTAT_DATA_LONG },
   2186 	{ "chip_id",			KSTAT_DATA_LONG },
   2187 	{ "implementation",		KSTAT_DATA_STRING },
   2188 	{ "brand",			KSTAT_DATA_STRING },
   2189 	{ "core_id",			KSTAT_DATA_LONG },
   2190 	{ "current_clock_Hz",		KSTAT_DATA_UINT64 },
   2191 	{ "supported_frequencies_Hz",	KSTAT_DATA_STRING },
   2192 	{ "pg_id",			KSTAT_DATA_LONG },
   2193 #if defined(__sparcv9)
   2194 	{ "device_ID",			KSTAT_DATA_UINT64 },
   2195 	{ "cpu_fru",			KSTAT_DATA_STRING },
   2196 #endif
   2197 #if defined(__x86)
   2198 	{ "vendor_id",			KSTAT_DATA_STRING },
   2199 	{ "family",			KSTAT_DATA_INT32 },
   2200 	{ "model",			KSTAT_DATA_INT32 },
   2201 	{ "stepping",			KSTAT_DATA_INT32 },
   2202 	{ "clog_id",			KSTAT_DATA_INT32 },
   2203 	{ "pkg_core_id",		KSTAT_DATA_LONG },
   2204 	{ "ncpu_per_chip",		KSTAT_DATA_INT32 },
   2205 	{ "ncore_per_chip",		KSTAT_DATA_INT32 },
   2206 	{ "supported_max_cstates",	KSTAT_DATA_INT32 },
   2207 	{ "current_cstate",		KSTAT_DATA_INT32 },
   2208 	{ "cache_id",			KSTAT_DATA_INT32 },
   2209 	{ "socket_type",		KSTAT_DATA_STRING },
   2210 #endif
   2211 };
   2212 
   2213 static kmutex_t cpu_info_template_lock;
   2214 
   2215 static int
   2216 cpu_info_kstat_update(kstat_t *ksp, int rw)
   2217 {
   2218 	cpu_t	*cp = ksp->ks_private;
   2219 	const char *pi_state;
   2220 
   2221 	if (rw == KSTAT_WRITE)
   2222 		return (EACCES);
   2223 
   2224 #if defined(__x86)
   2225 	/* Is the cpu still initialising itself? */
   2226 	if (cpuid_checkpass(cp, 1) == 0)
   2227 		return (ENXIO);
   2228 #endif
   2229 	switch (cp->cpu_type_info.pi_state) {
   2230 	case P_ONLINE:
   2231 		pi_state = PS_ONLINE;
   2232 		break;
   2233 	case P_POWEROFF:
   2234 		pi_state = PS_POWEROFF;
   2235 		break;
   2236 	case P_NOINTR:
   2237 		pi_state = PS_NOINTR;
   2238 		break;
   2239 	case P_FAULTED:
   2240 		pi_state = PS_FAULTED;
   2241 		break;
   2242 	case P_SPARE:
   2243 		pi_state = PS_SPARE;
   2244 		break;
   2245 	case P_OFFLINE:
   2246 		pi_state = PS_OFFLINE;
   2247 		break;
   2248 	default:
   2249 		pi_state = "unknown";
   2250 	}
   2251 	(void) strcpy(cpu_info_template.ci_state.value.c, pi_state);
   2252 	cpu_info_template.ci_state_begin.value.l = cp->cpu_state_begin;
   2253 	(void) strncpy(cpu_info_template.ci_cpu_type.value.c,
   2254 	    cp->cpu_type_info.pi_processor_type, 15);
   2255 	(void) strncpy(cpu_info_template.ci_fpu_type.value.c,
   2256 	    cp->cpu_type_info.pi_fputypes, 15);
   2257 	cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock;
   2258 	cpu_info_template.ci_chip_id.value.l =
   2259 	    pg_plat_hw_instance_id(cp, PGHW_CHIP);
   2260 	kstat_named_setstr(&cpu_info_template.ci_implementation,
   2261 	    cp->cpu_idstr);
   2262 	kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr);
   2263 	cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp);
   2264 	cpu_info_template.ci_curr_clock_Hz.value.ui64 =
   2265 	    cp->cpu_curr_clock;
   2266 	cpu_info_template.ci_pg_id.value.l =
   2267 	    cp->cpu_pg && cp->cpu_pg->cmt_lineage ?
   2268 	    cp->cpu_pg->cmt_lineage->pg_id : -1;
   2269 	kstat_named_setstr(&cpu_info_template.ci_supp_freq_Hz,
   2270 	    cp->cpu_supp_freqs);
   2271 #if defined(__sparcv9)
   2272 	cpu_info_template.ci_device_ID.value.ui64 =
   2273 	    cpunodes[cp->cpu_id].device_id;
   2274 	kstat_named_setstr(&cpu_info_template.ci_cpu_fru, cpu_fru_fmri(cp));
   2275 #endif
   2276 #if defined(__x86)
   2277 	kstat_named_setstr(&cpu_info_template.ci_vendorstr,
   2278 	    cpuid_getvendorstr(cp));
   2279 	cpu_info_template.ci_family.value.l = cpuid_getfamily(cp);
   2280 	cpu_info_template.ci_model.value.l = cpuid_getmodel(cp);
   2281 	cpu_info_template.ci_step.value.l = cpuid_getstep(cp);
   2282 	cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp);
   2283 	cpu_info_template.ci_ncpuperchip.value.l = cpuid_get_ncpu_per_chip(cp);
   2284 	cpu_info_template.ci_ncoreperchip.value.l =
   2285 	    cpuid_get_ncore_per_chip(cp);
   2286 	cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp);
   2287 	cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates;
   2288 	cpu_info_template.ci_curr_cstate.value.l = cpu_idle_get_cpu_state(cp);
   2289 	cpu_info_template.ci_cacheid.value.i32 = cpuid_get_cacheid(cp);
   2290 	kstat_named_setstr(&cpu_info_template.ci_sktstr,
   2291 	    cpuid_getsocketstr(cp));
   2292 #endif
   2293 
   2294 	return (0);
   2295 }
   2296 
   2297 static void
   2298 cpu_info_kstat_create(cpu_t *cp)
   2299 {
   2300 	zoneid_t zoneid;
   2301 
   2302 	ASSERT(MUTEX_HELD(&cpu_lock));
   2303 
   2304 	if (pool_pset_enabled())
   2305 		zoneid = GLOBAL_ZONEID;
   2306 	else
   2307 		zoneid = ALL_ZONES;
   2308 	if ((cp->cpu_info_kstat = kstat_create_zone("cpu_info", cp->cpu_id,
   2309 	    NULL, "misc", KSTAT_TYPE_NAMED,
   2310 	    sizeof (cpu_info_template) / sizeof (kstat_named_t),
   2311 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE, zoneid)) != NULL) {
   2312 		cp->cpu_info_kstat->ks_data_size += 2 * CPU_IDSTRLEN;
   2313 #if defined(__sparcv9)
   2314 		cp->cpu_info_kstat->ks_data_size +=
   2315 		    strlen(cpu_fru_fmri(cp)) + 1;
   2316 #endif
   2317 #if defined(__x86)
   2318 		cp->cpu_info_kstat->ks_data_size += X86_VENDOR_STRLEN;
   2319 #endif
   2320 		if (cp->cpu_supp_freqs != NULL)
   2321 			cp->cpu_info_kstat->ks_data_size +=
   2322 			    strlen(cp->cpu_supp_freqs) + 1;
   2323 		cp->cpu_info_kstat->ks_lock = &cpu_info_template_lock;
   2324 		cp->cpu_info_kstat->ks_data = &cpu_info_template;
   2325 		cp->cpu_info_kstat->ks_private = cp;
   2326 		cp->cpu_info_kstat->ks_update = cpu_info_kstat_update;
   2327 		kstat_install(cp->cpu_info_kstat);
   2328 	}
   2329 }
   2330 
   2331 static void
   2332 cpu_info_kstat_destroy(cpu_t *cp)
   2333 {
   2334 	ASSERT(MUTEX_HELD(&cpu_lock));
   2335 
   2336 	kstat_delete(cp->cpu_info_kstat);
   2337 	cp->cpu_info_kstat = NULL;
   2338 }
   2339 
   2340 /*
   2341  * Create and install kstats for the boot CPU.
   2342  */
   2343 void
   2344 cpu_kstat_init(cpu_t *cp)
   2345 {
   2346 	mutex_enter(&cpu_lock);
   2347 	cpu_info_kstat_create(cp);
   2348 	cpu_stats_kstat_create(cp);
   2349 	cpu_create_intrstat(cp);
   2350 	cpu_set_state(cp);
   2351 	mutex_exit(&cpu_lock);
   2352 }
   2353 
   2354 /*
   2355  * Make visible to the zone that subset of the cpu information that would be
   2356  * initialized when a cpu is configured (but still offline).
   2357  */
   2358 void
   2359 cpu_visibility_configure(cpu_t *cp, zone_t *zone)
   2360 {
   2361 	zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
   2362 
   2363 	ASSERT(MUTEX_HELD(&cpu_lock));
   2364 	ASSERT(pool_pset_enabled());
   2365 	ASSERT(cp != NULL);
   2366 
   2367 	if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
   2368 		zone->zone_ncpus++;
   2369 		ASSERT(zone->zone_ncpus <= ncpus);
   2370 	}
   2371 	if (cp->cpu_info_kstat != NULL)
   2372 		kstat_zone_add(cp->cpu_info_kstat, zoneid);
   2373 }
   2374 
   2375 /*
   2376  * Make visible to the zone that subset of the cpu information that would be
   2377  * initialized when a previously configured cpu is onlined.
   2378  */
   2379 void
   2380 cpu_visibility_online(cpu_t *cp, zone_t *zone)
   2381 {
   2382 	kstat_t *ksp;
   2383 	char name[sizeof ("cpu_stat") + 10];	/* enough for 32-bit cpuids */
   2384 	zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
   2385 	processorid_t cpun;
   2386 
   2387 	ASSERT(MUTEX_HELD(&cpu_lock));
   2388 	ASSERT(pool_pset_enabled());
   2389 	ASSERT(cp != NULL);
   2390 	ASSERT(cpu_is_active(cp));
   2391 
   2392 	cpun = cp->cpu_id;
   2393 	if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
   2394 		zone->zone_ncpus_online++;
   2395 		ASSERT(zone->zone_ncpus_online <= ncpus_online);
   2396 	}
   2397 	(void) snprintf(name, sizeof (name), "cpu_stat%d", cpun);
   2398 	if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES))
   2399 	    != NULL) {
   2400 		kstat_zone_add(ksp, zoneid);
   2401 		kstat_rele(ksp);
   2402 	}
   2403 	if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) {
   2404 		kstat_zone_add(ksp, zoneid);
   2405 		kstat_rele(ksp);
   2406 	}
   2407 	if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) {
   2408 		kstat_zone_add(ksp, zoneid);
   2409 		kstat_rele(ksp);
   2410 	}
   2411 	if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) !=
   2412 	    NULL) {
   2413 		kstat_zone_add(ksp, zoneid);
   2414 		kstat_rele(ksp);
   2415 	}
   2416 }
   2417 
   2418 /*
   2419  * Update relevant kstats such that cpu is now visible to processes
   2420  * executing in specified zone.
   2421  */
   2422 void
   2423 cpu_visibility_add(cpu_t *cp, zone_t *zone)
   2424 {
   2425 	cpu_visibility_configure(cp, zone);
   2426 	if (cpu_is_active(cp))
   2427 		cpu_visibility_online(cp, zone);
   2428 }
   2429 
   2430 /*
   2431  * Make invisible to the zone that subset of the cpu information that would be
   2432  * torn down when a previously offlined cpu is unconfigured.
   2433  */
   2434 void
   2435 cpu_visibility_unconfigure(cpu_t *cp, zone_t *zone)
   2436 {
   2437 	zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
   2438 
   2439 	ASSERT(MUTEX_HELD(&cpu_lock));
   2440 	ASSERT(pool_pset_enabled());
   2441 	ASSERT(cp != NULL);
   2442 
   2443 	if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
   2444 		ASSERT(zone->zone_ncpus != 0);
   2445 		zone->zone_ncpus--;
   2446 	}
   2447 	if (cp->cpu_info_kstat)
   2448 		kstat_zone_remove(cp->cpu_info_kstat, zoneid);
   2449 }
   2450 
   2451 /*
   2452  * Make invisible to the zone that subset of the cpu information that would be
   2453  * torn down when a cpu is offlined (but still configured).
   2454  */
   2455 void
   2456 cpu_visibility_offline(cpu_t *cp, zone_t *zone)
   2457 {
   2458 	kstat_t *ksp;
   2459 	char name[sizeof ("cpu_stat") + 10];	/* enough for 32-bit cpuids */
   2460 	zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
   2461 	processorid_t cpun;
   2462 
   2463 	ASSERT(MUTEX_HELD(&cpu_lock));
   2464 	ASSERT(pool_pset_enabled());
   2465 	ASSERT(cp != NULL);
   2466 	ASSERT(cpu_is_active(cp));
   2467 
   2468 	cpun = cp->cpu_id;
   2469 	if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) {
   2470 		ASSERT(zone->zone_ncpus_online != 0);
   2471 		zone->zone_ncpus_online--;
   2472 	}
   2473 
   2474 	if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) !=
   2475 	    NULL) {
   2476 		kstat_zone_remove(ksp, zoneid);
   2477 		kstat_rele(ksp);
   2478 	}
   2479 	if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) {
   2480 		kstat_zone_remove(ksp, zoneid);
   2481 		kstat_rele(ksp);
   2482 	}
   2483 	if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) {
   2484 		kstat_zone_remove(ksp, zoneid);
   2485 		kstat_rele(ksp);
   2486 	}
   2487 	(void) snprintf(name, sizeof (name), "cpu_stat%d", cpun);
   2488 	if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES))
   2489 	    != NULL) {
   2490 		kstat_zone_remove(ksp, zoneid);
   2491 		kstat_rele(ksp);
   2492 	}
   2493 }
   2494 
   2495 /*
   2496  * Update relevant kstats such that cpu is no longer visible to processes
   2497  * executing in specified zone.
   2498  */
   2499 void
   2500 cpu_visibility_remove(cpu_t *cp, zone_t *zone)
   2501 {
   2502 	if (cpu_is_active(cp))
   2503 		cpu_visibility_offline(cp, zone);
   2504 	cpu_visibility_unconfigure(cp, zone);
   2505 }
   2506 
   2507 /*
   2508  * Bind a thread to a CPU as requested.
   2509  */
   2510 int
   2511 cpu_bind_thread(kthread_id_t tp, processorid_t bind, processorid_t *obind,
   2512     int *error)
   2513 {
   2514 	processorid_t	binding;
   2515 	cpu_t		*cp = NULL;
   2516 
   2517 	ASSERT(MUTEX_HELD(&cpu_lock));
   2518 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
   2519 
   2520 	thread_lock(tp);
   2521 
   2522 	/*
   2523 	 * Record old binding, but change the obind, which was initialized
   2524 	 * to PBIND_NONE, only if this thread has a binding.  This avoids
   2525 	 * reporting PBIND_NONE for a process when some LWPs are bound.
   2526 	 */
   2527 	binding = tp->t_bind_cpu;
   2528 	if (binding != PBIND_NONE)
   2529 		*obind = binding;	/* record old binding */
   2530 
   2531 	switch (bind) {
   2532 	case PBIND_QUERY:
   2533 		/* Just return the old binding */
   2534 		thread_unlock(tp);
   2535 		return (0);
   2536 
   2537 	case PBIND_QUERY_TYPE:
   2538 		/* Return the binding type */
   2539 		*obind = TB_CPU_IS_SOFT(tp) ? PBIND_SOFT : PBIND_HARD;
   2540 		thread_unlock(tp);
   2541 		return (0);
   2542 
   2543 	case PBIND_SOFT:
   2544 		/*
   2545 		 *  Set soft binding for this thread and return the actual
   2546 		 *  binding
   2547 		 */
   2548 		TB_CPU_SOFT_SET(tp);
   2549 		thread_unlock(tp);
   2550 		return (0);
   2551 
   2552 	case PBIND_HARD:
   2553 		/*
   2554 		 *  Set hard binding for this thread and return the actual
   2555 		 *  binding
   2556 		 */
   2557 		TB_CPU_HARD_SET(tp);
   2558 		thread_unlock(tp);
   2559 		return (0);
   2560 
   2561 	default:
   2562 		break;
   2563 	}
   2564 
   2565 	/*
   2566 	 * If this thread/LWP cannot be bound because of permission
   2567 	 * problems, just note that and return success so that the
   2568 	 * other threads/LWPs will be bound.  This is the way
   2569 	 * processor_bind() is defined to work.
   2570 	 *
   2571 	 * Binding will get EPERM if the thread is of system class
   2572 	 * or hasprocperm() fails.
   2573 	 */
   2574 	if (tp->t_cid == 0 || !hasprocperm(tp->t_cred, CRED())) {
   2575 		*error = EPERM;
   2576 		thread_unlock(tp);
   2577 		return (0);
   2578 	}
   2579 
   2580 	binding = bind;
   2581 	if (binding != PBIND_NONE) {
   2582 		cp = cpu_get((processorid_t)binding);
   2583 		/*
   2584 		 * Make sure binding is valid and is in right partition.
   2585 		 */
   2586 		if (cp == NULL || tp->t_cpupart != cp->cpu_part) {
   2587 			*error = EINVAL;
   2588 			thread_unlock(tp);
   2589 			return (0);
   2590 		}
   2591 	}
   2592 	tp->t_bind_cpu = binding;	/* set new binding */
   2593 
   2594 	/*
   2595 	 * If there is no system-set reason for affinity, set
   2596 	 * the t_bound_cpu field to reflect the binding.
   2597 	 */
   2598 	if (tp->t_affinitycnt == 0) {
   2599 		if (binding == PBIND_NONE) {
   2600 			/*
   2601 			 * We may need to adjust disp_max_unbound_pri
   2602 			 * since we're becoming unbound.
   2603 			 */
   2604 			disp_adjust_unbound_pri(tp);
   2605 
   2606 			tp->t_bound_cpu = NULL;	/* set new binding */
   2607 
   2608 			/*
   2609 			 * Move thread to lgroup with strongest affinity
   2610 			 * after unbinding
   2611 			 */
   2612 			if (tp->t_lgrp_affinity)
   2613 				lgrp_move_thread(tp,
   2614 				    lgrp_choose(tp, tp->t_cpupart), 1);
   2615 
   2616 			if (tp->t_state == TS_ONPROC &&
   2617 			    tp->t_cpu->cpu_part != tp->t_cpupart)
   2618 				cpu_surrender(tp);
   2619 		} else {
   2620 			lpl_t	*lpl;
   2621 
   2622 			tp->t_bound_cpu = cp;
   2623 			ASSERT(cp->cpu_lpl != NULL);
   2624 
   2625 			/*
   2626 			 * Set home to lgroup with most affinity containing CPU
   2627 			 * that thread is being bound or minimum bounding
   2628 			 * lgroup if no affinities set
   2629 			 */
   2630 			if (tp->t_lgrp_affinity)
   2631 				lpl = lgrp_affinity_best(tp, tp->t_cpupart,
   2632 				    LGRP_NONE, B_FALSE);
   2633 			else
   2634 				lpl = cp->cpu_lpl;
   2635 
   2636 			if (tp->t_lpl != lpl) {
   2637 				/* can't grab cpu_lock */
   2638 				lgrp_move_thread(tp, lpl, 1);
   2639 			}
   2640 
   2641 			/*
   2642 			 * Make the thread switch to the bound CPU.
   2643 			 * If the thread is runnable, we need to
   2644 			 * requeue it even if t_cpu is already set
   2645 			 * to the right CPU, since it may be on a
   2646 			 * kpreempt queue and need to move to a local
   2647 			 * queue.  We could check t_disp_queue to
   2648 			 * avoid unnecessary overhead if it's already
   2649 			 * on the right queue, but since this isn't
   2650 			 * a performance-critical operation it doesn't
   2651 			 * seem worth the extra code and complexity.
   2652 			 *
   2653 			 * If the thread is weakbound to the cpu then it will
   2654 			 * resist the new binding request until the weak
   2655 			 * binding drops.  The cpu_surrender or requeueing
   2656 			 * below could be skipped in such cases (since it
   2657 			 * will have no effect), but that would require
   2658 			 * thread_allowmigrate to acquire thread_lock so
   2659 			 * we'll take the very occasional hit here instead.
   2660 			 */
   2661 			if (tp->t_state == TS_ONPROC) {
   2662 				cpu_surrender(tp);
   2663 			} else if (tp->t_state == TS_RUN) {
   2664 				cpu_t *ocp = tp->t_cpu;
   2665 
   2666 				(void) dispdeq(tp);
   2667 				setbackdq(tp);
   2668 				/*
   2669 				 * Either on the bound CPU's disp queue now,
   2670 				 * or swapped out or on the swap queue.
   2671 				 */
   2672 				ASSERT(tp->t_disp_queue == cp->cpu_disp ||
   2673 				    tp->t_weakbound_cpu == ocp ||
   2674 				    (tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ))
   2675 				    != TS_LOAD);
   2676 			}
   2677 		}
   2678 	}
   2679 
   2680 	/*
   2681 	 * Our binding has changed; set TP_CHANGEBIND.
   2682 	 */
   2683 	tp->t_proc_flag |= TP_CHANGEBIND;
   2684 	aston(tp);
   2685 
   2686 	thread_unlock(tp);
   2687 
   2688 	return (0);
   2689 }
   2690 
   2691 #if CPUSET_WORDS > 1
   2692 
   2693 /*
   2694  * Functions for implementing cpuset operations when a cpuset is more
   2695  * than one word.  On platforms where a cpuset is a single word these
   2696  * are implemented as macros in cpuvar.h.
   2697  */
   2698 
   2699 void
   2700 cpuset_all(cpuset_t *s)
   2701 {
   2702 	int i;
   2703 
   2704 	for (i = 0; i < CPUSET_WORDS; i++)
   2705 		s->cpub[i] = ~0UL;
   2706 }
   2707 
   2708 void
   2709 cpuset_all_but(cpuset_t *s, uint_t cpu)
   2710 {
   2711 	cpuset_all(s);
   2712 	CPUSET_DEL(*s, cpu);
   2713 }
   2714 
   2715 void
   2716 cpuset_only(cpuset_t *s, uint_t cpu)
   2717 {
   2718 	CPUSET_ZERO(*s);
   2719 	CPUSET_ADD(*s, cpu);
   2720 }
   2721 
   2722 int
   2723 cpuset_isnull(cpuset_t *s)
   2724 {
   2725 	int i;
   2726 
   2727 	for (i = 0; i < CPUSET_WORDS; i++)
   2728 		if (s->cpub[i] != 0)
   2729 			return (0);
   2730 	return (1);
   2731 }
   2732 
   2733 int
   2734 cpuset_cmp(cpuset_t *s1, cpuset_t *s2)
   2735 {
   2736 	int i;
   2737 
   2738 	for (i = 0; i < CPUSET_WORDS; i++)
   2739 		if (s1->cpub[i] != s2->cpub[i])
   2740 			return (0);
   2741 	return (1);
   2742 }
   2743 
   2744 uint_t
   2745 cpuset_find(cpuset_t *s)
   2746 {
   2747 
   2748 	uint_t	i;
   2749 	uint_t	cpu = (uint_t)-1;
   2750 
   2751 	/*
   2752 	 * Find a cpu in the cpuset
   2753 	 */
   2754 	for (i = 0; i < CPUSET_WORDS; i++) {
   2755 		cpu = (uint_t)(lowbit(s->cpub[i]) - 1);
   2756 		if (cpu != (uint_t)-1) {
   2757 			cpu += i * BT_NBIPUL;
   2758 			break;
   2759 		}
   2760 	}
   2761 	return (cpu);
   2762 }
   2763 
   2764 void
   2765 cpuset_bounds(cpuset_t *s, uint_t *smallestid, uint_t *largestid)
   2766 {
   2767 	int	i, j;
   2768 	uint_t	bit;
   2769 
   2770 	/*
   2771 	 * First, find the smallest cpu id in the set.
   2772 	 */
   2773 	for (i = 0; i < CPUSET_WORDS; i++) {
   2774 		if (s->cpub[i] != 0) {
   2775 			bit = (uint_t)(lowbit(s->cpub[i]) - 1);
   2776 			ASSERT(bit != (uint_t)-1);
   2777 			*smallestid = bit + (i * BT_NBIPUL);
   2778 
   2779 			/*
   2780 			 * Now find the largest cpu id in
   2781 			 * the set and return immediately.
   2782 			 * Done in an inner loop to avoid
   2783 			 * having to break out of the first
   2784 			 * loop.
   2785 			 */
   2786 			for (j = CPUSET_WORDS - 1; j >= i; j--) {
   2787 				if (s->cpub[j] != 0) {
   2788 					bit = (uint_t)(highbit(s->cpub[j]) - 1);
   2789 					ASSERT(bit != (uint_t)-1);
   2790 					*largestid = bit + (j * BT_NBIPUL);
   2791 					ASSERT(*largestid >= *smallestid);
   2792 					return;
   2793 				}
   2794 			}
   2795 
   2796 			/*
   2797 			 * If this code is reached, a
   2798 			 * smallestid was found, but not a
   2799 			 * largestid. The cpuset must have
   2800 			 * been changed during the course
   2801 			 * of this function call.
   2802 			 */
   2803 			ASSERT(0);
   2804 		}
   2805 	}
   2806 	*smallestid = *largestid = CPUSET_NOTINSET;
   2807 }
   2808 
   2809 #endif	/* CPUSET_WORDS */
   2810 
   2811 /*
   2812  * Unbind threads bound to specified CPU.
   2813  *
   2814  * If `unbind_all_threads' is true, unbind all user threads bound to a given
   2815  * CPU. Otherwise unbind all soft-bound user threads.
   2816  */
   2817 int
   2818 cpu_unbind(processorid_t cpu, boolean_t unbind_all_threads)
   2819 {
   2820 	processorid_t obind;
   2821 	kthread_t *tp;
   2822 	int ret = 0;
   2823 	proc_t *pp;
   2824 	int err, berr = 0;
   2825 
   2826 	ASSERT(MUTEX_HELD(&cpu_lock));
   2827 
   2828 	mutex_enter(&pidlock);
   2829 	for (pp = practive; pp != NULL; pp = pp->p_next) {
   2830 		mutex_enter(&pp->p_lock);
   2831 		tp = pp->p_tlist;
   2832 		/*
   2833 		 * Skip zombies, kernel processes, and processes in
   2834 		 * other zones, if called from a non-global zone.
   2835 		 */
   2836 		if (tp == NULL || (pp->p_flag & SSYS) ||
   2837 		    !HASZONEACCESS(curproc, pp->p_zone->zone_id)) {
   2838 			mutex_exit(&pp->p_lock);
   2839 			continue;
   2840 		}
   2841 		do {
   2842 			if (tp->t_bind_cpu != cpu)
   2843 				continue;
   2844 			/*
   2845 			 * Skip threads with hard binding when
   2846 			 * `unbind_all_threads' is not specified.
   2847 			 */
   2848 			if (!unbind_all_threads && TB_CPU_IS_HARD(tp))
   2849 				continue;
   2850 			err = cpu_bind_thread(tp, PBIND_NONE, &obind, &berr);
   2851 			if (ret == 0)
   2852 				ret = err;
   2853 		} while ((tp = tp->t_forw) != pp->p_tlist);
   2854 		mutex_exit(&pp->p_lock);
   2855 	}
   2856 	mutex_exit(&pidlock);
   2857 	if (ret == 0)
   2858 		ret = berr;
   2859 	return (ret);
   2860 }
   2861 
   2862 
   2863 /*
   2864  * Destroy all remaining bound threads on a cpu.
   2865  */
   2866 void
   2867 cpu_destroy_bound_threads(cpu_t *cp)
   2868 {
   2869 	extern id_t syscid;
   2870 	register kthread_id_t	t, tlist, tnext;
   2871 
   2872 	/*
   2873 	 * Destroy all remaining bound threads on the cpu.  This
   2874 	 * should include both the interrupt threads and the idle thread.
   2875 	 * This requires some care, since we need to traverse the
   2876 	 * thread list with the pidlock mutex locked, but thread_free
   2877 	 * also locks the pidlock mutex.  So, we collect the threads
   2878 	 * we're going to reap in a list headed by "tlist", then we
   2879 	 * unlock the pidlock mutex and traverse the tlist list,
   2880 	 * doing thread_free's on the thread's.	 Simple, n'est pas?
   2881 	 * Also, this depends on thread_free not mucking with the
   2882 	 * t_next and t_prev links of the thread.
   2883 	 */
   2884 
   2885 	if ((t = curthread) != NULL) {
   2886 
   2887 		tlist = NULL;
   2888 		mutex_enter(&pidlock);
   2889 		do {
   2890 			tnext = t->t_next;
   2891 			if (t->t_bound_cpu == cp) {
   2892 
   2893 				/*
   2894 				 * We've found a bound thread, carefully unlink
   2895 				 * it out of the thread list, and add it to
   2896 				 * our "tlist".	 We "know" we don't have to
   2897 				 * worry about unlinking curthread (the thread
   2898 				 * that is executing this code).
   2899 				 */
   2900 				t->t_next->t_prev = t->t_prev;
   2901 				t->t_prev->t_next = t->t_next;
   2902 				t->t_next = tlist;
   2903 				tlist = t;
   2904 				ASSERT(t->t_cid == syscid);
   2905 				/* wake up anyone blocked in thread_join */
   2906 				cv_broadcast(&t->t_joincv);
   2907 				/*
   2908 				 * t_lwp set by interrupt threads and not
   2909 				 * cleared.
   2910 				 */
   2911 				t->t_lwp = NULL;
   2912 				/*
   2913 				 * Pause and idle threads always have
   2914 				 * t_state set to TS_ONPROC.
   2915 				 */
   2916 				t->t_state = TS_FREE;
   2917 				t->t_prev = NULL;	/* Just in case */
   2918 			}
   2919 
   2920 		} while ((t = tnext) != curthread);
   2921 
   2922 		mutex_exit(&pidlock);
   2923 
   2924 		mutex_sync();
   2925 		for (t = tlist; t != NULL; t = tnext) {
   2926 			tnext = t->t_next;
   2927 			thread_free(t);
   2928 		}
   2929 	}
   2930 }
   2931 
   2932 /*
   2933  * Update the cpu_supp_freqs of this cpu. This information is returned
   2934  * as part of cpu_info kstats. If the cpu_info_kstat exists already, then
   2935  * maintain the kstat data size.
   2936  */
   2937 void
   2938 cpu_set_supp_freqs(cpu_t *cp, const char *freqs)
   2939 {
   2940 	char clkstr[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */
   2941 	const char *lfreqs = clkstr;
   2942 	boolean_t kstat_exists = B_FALSE;
   2943 	kstat_t *ksp;
   2944 	size_t len;
   2945 
   2946 	/*
   2947 	 * A NULL pointer means we only support one speed.
   2948 	 */
   2949 	if (freqs == NULL)
   2950 		(void) snprintf(clkstr, sizeof (clkstr), "%"PRIu64,
   2951 		    cp->cpu_curr_clock);
   2952 	else
   2953 		lfreqs = freqs;
   2954 
   2955 	/*
   2956 	 * Make sure the frequency doesn't change while a snapshot is
   2957 	 * going on. Of course, we only need to worry about this if
   2958 	 * the kstat exists.
   2959 	 */
   2960 	if ((ksp = cp->cpu_info_kstat) != NULL) {
   2961 		mutex_enter(ksp->ks_lock);
   2962 		kstat_exists = B_TRUE;
   2963 	}
   2964 
   2965 	/*
   2966 	 * Free any previously allocated string and if the kstat
   2967 	 * already exists, then update its data size.
   2968 	 */
   2969 	if (cp->cpu_supp_freqs != NULL) {
   2970 		len = strlen(cp->cpu_supp_freqs) + 1;
   2971 		kmem_free(cp->cpu_supp_freqs, len);
   2972 		if (kstat_exists)
   2973 			ksp->ks_data_size -= len;
   2974 	}
   2975 
   2976 	/*
   2977 	 * Allocate the new string and set the pointer.
   2978 	 */
   2979 	len = strlen(lfreqs) + 1;
   2980 	cp->cpu_supp_freqs = kmem_alloc(len, KM_SLEEP);
   2981 	(void) strcpy(cp->cpu_supp_freqs, lfreqs);
   2982 
   2983 	/*
   2984 	 * If the kstat already exists then update the data size and
   2985 	 * free the lock.
   2986 	 */
   2987 	if (kstat_exists) {
   2988 		ksp->ks_data_size += len;
   2989 		mutex_exit(ksp->ks_lock);
   2990 	}
   2991 }
   2992 
   2993 /*
   2994  * Indicate the current CPU's clock freqency (in Hz).
   2995  * The calling context must be such that CPU references are safe.
   2996  */
   2997 void
   2998 cpu_set_curr_clock(uint64_t new_clk)
   2999 {
   3000 	uint64_t old_clk;
   3001 
   3002 	old_clk = CPU->cpu_curr_clock;
   3003 	CPU->cpu_curr_clock = new_clk;
   3004 
   3005 	/*
   3006 	 * The cpu-change-speed DTrace probe exports the frequency in Hz
   3007 	 */
   3008 	DTRACE_PROBE3(cpu__change__speed, processorid_t, CPU->cpu_id,
   3009 	    uint64_t, old_clk, uint64_t, new_clk);
   3010 }
   3011 
   3012 /*
   3013  * processor_info(2) and p_online(2) status support functions
   3014  *   The constants returned by the cpu_get_state() and cpu_get_state_str() are
   3015  *   for use in communicating processor state information to userland.  Kernel
   3016  *   subsystems should only be using the cpu_flags value directly.  Subsystems
   3017  *   modifying cpu_flags should record the state change via a call to the
   3018  *   cpu_set_state().
   3019  */
   3020 
   3021 /*
   3022  * Update the pi_state of this CPU.  This function provides the CPU status for
   3023  * the information returned by processor_info(2).
   3024  */
   3025 void
   3026 cpu_set_state(cpu_t *cpu)
   3027 {
   3028 	ASSERT(MUTEX_HELD(&cpu_lock));
   3029 	cpu->cpu_type_info.pi_state = cpu_get_state(cpu);
   3030 	cpu->cpu_state_begin = gethrestime_sec();
   3031 	pool_cpu_mod = gethrtime();
   3032 }
   3033 
   3034 /*
   3035  * Return offline/online/other status for the indicated CPU.  Use only for
   3036  * communication with user applications; cpu_flags provides the in-kernel
   3037  * interface.
   3038  */
   3039 int
   3040 cpu_get_state(cpu_t *cpu)
   3041 {
   3042 	ASSERT(MUTEX_HELD(&cpu_lock));
   3043 	if (cpu->cpu_flags & CPU_POWEROFF)
   3044 		return (P_POWEROFF);
   3045 	else if (cpu->cpu_flags & CPU_FAULTED)
   3046 		return (P_FAULTED);
   3047 	else if (cpu->cpu_flags & CPU_SPARE)
   3048 		return (P_SPARE);
   3049 	else if ((cpu->cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY)
   3050 		return (P_OFFLINE);
   3051 	else if (cpu->cpu_flags & CPU_ENABLE)
   3052 		return (P_ONLINE);
   3053 	else
   3054 		return (P_NOINTR);
   3055 }
   3056 
   3057 /*
   3058  * Return processor_info(2) state as a string.
   3059  */
   3060 const char *
   3061 cpu_get_state_str(cpu_t *cpu)
   3062 {
   3063 	const char *string;
   3064 
   3065 	switch (cpu_get_state(cpu)) {
   3066 	case P_ONLINE:
   3067 		string = PS_ONLINE;
   3068 		break;
   3069 	case P_POWEROFF:
   3070 		string = PS_POWEROFF;
   3071 		break;
   3072 	case P_NOINTR:
   3073 		string = PS_NOINTR;
   3074 		break;
   3075 	case P_SPARE:
   3076 		string = PS_SPARE;
   3077 		break;
   3078 	case P_FAULTED:
   3079 		string = PS_FAULTED;
   3080 		break;
   3081 	case P_OFFLINE:
   3082 		string = PS_OFFLINE;
   3083 		break;
   3084 	default:
   3085 		string = "unknown";
   3086 		break;
   3087 	}
   3088 	return (string);
   3089 }
   3090 
   3091 /*
   3092  * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named
   3093  * kstats, respectively.  This is done when a CPU is initialized or placed
   3094  * online via p_online(2).
   3095  */
   3096 static void
   3097 cpu_stats_kstat_create(cpu_t *cp)
   3098 {
   3099 	int 	instance = cp->cpu_id;
   3100 	char 	*module = "cpu";
   3101 	char 	*class = "misc";
   3102 	kstat_t	*ksp;
   3103 	zoneid_t zoneid;
   3104 
   3105 	ASSERT(MUTEX_HELD(&cpu_lock));
   3106 
   3107 	if (pool_pset_enabled())
   3108 		zoneid = GLOBAL_ZONEID;
   3109 	else
   3110 		zoneid = ALL_ZONES;
   3111 	/*
   3112 	 * Create named kstats
   3113 	 */
   3114 #define	CPU_STATS_KS_CREATE(name, tsize, update_func)                    \
   3115 	ksp = kstat_create_zone(module, instance, (name), class,         \
   3116 	    KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0,       \
   3117 	    zoneid);                                                     \
   3118 	if (ksp != NULL) {                                               \
   3119 		ksp->ks_private = cp;                                    \
   3120 		ksp->ks_update = (update_func);                          \
   3121 		kstat_install(ksp);                                      \
   3122 	} else                                                           \
   3123 		cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \
   3124 		    module, instance, (name));
   3125 
   3126 	CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template),
   3127 	    cpu_sys_stats_ks_update);
   3128 	CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template),
   3129 	    cpu_vm_stats_ks_update);
   3130 
   3131 	/*
   3132 	 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat.
   3133 	 */
   3134 	ksp = kstat_create_zone("cpu_stat", cp->cpu_id, NULL,
   3135 	    "misc", KSTAT_TYPE_RAW, sizeof (cpu_stat_t), 0, zoneid);
   3136 	if (ksp != NULL) {
   3137 		ksp->ks_update = cpu_stat_ks_update;
   3138 		ksp->ks_private = cp;
   3139 		kstat_install(ksp);
   3140 	}
   3141 }
   3142 
   3143 static void
   3144 cpu_stats_kstat_destroy(cpu_t *cp)
   3145 {
   3146 	char ks_name[KSTAT_STRLEN];
   3147 
   3148 	(void) sprintf(ks_name, "cpu_stat%d", cp->cpu_id);
   3149 	kstat_delete_byname("cpu_stat", cp->cpu_id, ks_name);
   3150 
   3151 	kstat_delete_byname("cpu", cp->cpu_id, "sys");
   3152 	kstat_delete_byname("cpu", cp->cpu_id, "vm");
   3153 }
   3154 
   3155 static int
   3156 cpu_sys_stats_ks_update(kstat_t *ksp, int rw)
   3157 {
   3158 	cpu_t *cp = (cpu_t *)ksp->ks_private;
   3159 	struct cpu_sys_stats_ks_data *csskd;
   3160 	cpu_sys_stats_t *css;
   3161 	hrtime_t msnsecs[NCMSTATES];
   3162 	int	i;
   3163 
   3164 	if (rw == KSTAT_WRITE)
   3165 		return (EACCES);
   3166 
   3167 	csskd = ksp->ks_data;
   3168 	css = &cp->cpu_stats.sys;
   3169 
   3170 	/*
   3171 	 * Read CPU mstate, but compare with the last values we
   3172 	 * received to make sure that the returned kstats never
   3173 	 * decrease.
   3174 	 */
   3175 
   3176 	get_cpu_mstate(cp, msnsecs);
   3177 	if (csskd->cpu_nsec_idle.value.ui64 > msnsecs[CMS_IDLE])
   3178 		msnsecs[CMS_IDLE] = csskd->cpu_nsec_idle.value.ui64;
   3179 	if (csskd->cpu_nsec_user.value.ui64 > msnsecs[CMS_USER])
   3180 		msnsecs[CMS_USER] = csskd->cpu_nsec_user.value.ui64;
   3181 	if (csskd->cpu_nsec_kernel.value.ui64 > msnsecs[CMS_SYSTEM])
   3182 		msnsecs[CMS_SYSTEM] = csskd->cpu_nsec_kernel.value.ui64;
   3183 
   3184 	bcopy(&cpu_sys_stats_ks_data_template, ksp->ks_data,
   3185 	    sizeof (cpu_sys_stats_ks_data_template));
   3186 
   3187 	csskd->cpu_ticks_wait.value.ui64 = 0;
   3188 	csskd->wait_ticks_io.value.ui64 = 0;
   3189 
   3190 	csskd->cpu_nsec_idle.value.ui64 = msnsecs[CMS_IDLE];
   3191 	csskd->cpu_nsec_user.value.ui64 = msnsecs[CMS_USER];
   3192 	csskd->cpu_nsec_kernel.value.ui64 = msnsecs[CMS_SYSTEM];
   3193 	csskd->cpu_ticks_idle.value.ui64 =
   3194 	    NSEC_TO_TICK(csskd->cpu_nsec_idle.value.ui64);
   3195 	csskd->cpu_ticks_user.value.ui64 =
   3196 	    NSEC_TO_TICK(csskd->cpu_nsec_user.value.ui64);
   3197 	csskd->cpu_ticks_kernel.value.ui64 =
   3198 	    NSEC_TO_TICK(csskd->cpu_nsec_kernel.value.ui64);
   3199 	csskd->cpu_nsec_intr.value.ui64 = cp->cpu_intrlast;
   3200 	csskd->cpu_load_intr.value.ui64 = cp->cpu_intrload;
   3201 	csskd->bread.value.ui64 = css->bread;
   3202 	csskd->bwrite.value.ui64 = css->bwrite;
   3203 	csskd->lread.value.ui64 = css->lread;
   3204 	csskd->lwrite.value.ui64 = css->lwrite;
   3205 	csskd->phread.value.ui64 = css->phread;
   3206 	csskd->phwrite.value.ui64 = css->phwrite;
   3207 	csskd->pswitch.value.ui64 = css->pswitch;
   3208 	csskd->trap.value.ui64 = css->trap;
   3209 	csskd->intr.value.ui64 = 0;
   3210 	for (i = 0; i < PIL_MAX; i++)
   3211 		csskd->intr.value.ui64 += css->intr[i];
   3212 	csskd->syscall.value.ui64 = css->syscall;
   3213 	csskd->sysread.value.ui64 = css->sysread;
   3214 	csskd->syswrite.value.ui64 = css->syswrite;
   3215 	csskd->sysfork.value.ui64 = css->sysfork;
   3216 	csskd->sysvfork.value.ui64 = css->sysvfork;
   3217 	csskd->sysexec.value.ui64 = css->sysexec;
   3218 	csskd->readch.value.ui64 = css->readch;
   3219 	csskd->writech.value.ui64 = css->writech;
   3220 	csskd->rcvint.value.ui64 = css->rcvint;
   3221 	csskd->xmtint.value.ui64 = css->xmtint;
   3222 	csskd->mdmint.value.ui64 = css->mdmint;
   3223 	csskd->rawch.value.ui64 = css->rawch;
   3224 	csskd->canch.value.ui64 = css->canch;
   3225 	csskd->outch.value.ui64 = css->outch;
   3226 	csskd->msg.value.ui64 = css->msg;
   3227 	csskd->sema.value.ui64 = css->sema;
   3228 	csskd->namei.value.ui64 = css->namei;
   3229 	csskd->ufsiget.value.ui64 = css->ufsiget;
   3230 	csskd->ufsdirblk.value.ui64 = css->ufsdirblk;
   3231 	csskd->ufsipage.value.ui64 = css->ufsipage;
   3232 	csskd->ufsinopage.value.ui64 = css->ufsinopage;
   3233 	csskd->procovf.value.ui64 = css->procovf;
   3234 	csskd->intrthread.value.ui64 = 0;
   3235 	for (i = 0; i < LOCK_LEVEL - 1; i++)
   3236 		csskd->intrthread.value.ui64 += css->intr[i];
   3237 	csskd->intrblk.value.ui64 = css->intrblk;
   3238 	csskd->intrunpin.value.ui64 = css->intrunpin;
   3239 	csskd->idlethread.value.ui64 = css->idlethread;
   3240 	csskd->inv_swtch.value.ui64 = css->inv_swtch;
   3241 	csskd->nthreads.value.ui64 = css->nthreads;
   3242 	csskd->cpumigrate.value.ui64 = css->cpumigrate;
   3243 	csskd->xcalls.value.ui64 = css->xcalls;
   3244 	csskd->mutex_adenters.value.ui64 = css->mutex_adenters;
   3245 	csskd->rw_rdfails.value.ui64 = css->rw_rdfails;
   3246 	csskd->rw_wrfails.value.ui64 = css->rw_wrfails;
   3247 	csskd->modload.value.ui64 = css->modload;
   3248 	csskd->modunload.value.ui64 = css->modunload;
   3249 	csskd->bawrite.value.ui64 = css->bawrite;
   3250 	csskd->iowait.value.ui64 = css->iowait;
   3251 
   3252 	return (0);
   3253 }
   3254 
   3255 static int
   3256 cpu_vm_stats_ks_update(kstat_t *ksp, int rw)
   3257 {
   3258 	cpu_t *cp = (cpu_t *)ksp->ks_private;
   3259 	struct cpu_vm_stats_ks_data *cvskd;
   3260 	cpu_vm_stats_t *cvs;
   3261 
   3262 	if (rw == KSTAT_WRITE)
   3263 		return (EACCES);
   3264 
   3265 	cvs = &cp->cpu_stats.vm;
   3266 	cvskd = ksp->ks_data;
   3267 
   3268 	bcopy(&cpu_vm_stats_ks_data_template, ksp->ks_data,
   3269 	    sizeof (cpu_vm_stats_ks_data_template));
   3270 	cvskd->pgrec.value.ui64 = cvs->pgrec;
   3271 	cvskd->pgfrec.value.ui64 = cvs->pgfrec;
   3272 	cvskd->pgin.value.ui64 = cvs->pgin;
   3273 	cvskd->pgpgin.value.ui64 = cvs->pgpgin;
   3274 	cvskd->pgout.value.ui64 = cvs->pgout;
   3275 	cvskd->pgpgout.value.ui64 = cvs->pgpgout;
   3276 	cvskd->swapin.value.ui64 = cvs->swapin;
   3277 	cvskd->pgswapin.value.ui64 = cvs->pgswapin;
   3278 	cvskd->swapout.value.ui64 = cvs->swapout;
   3279 	cvskd->pgswapout.value.ui64 = cvs->pgswapout;
   3280 	cvskd->zfod.value.ui64 = cvs->zfod;
   3281 	cvskd->dfree.value.ui64 = cvs->dfree;
   3282 	cvskd->scan.value.ui64 = cvs->scan;
   3283 	cvskd->rev.value.ui64 = cvs->rev;
   3284 	cvskd->hat_fault.value.ui64 = cvs->hat_fault;
   3285 	cvskd->as_fault.value.ui64 = cvs->as_fault;
   3286 	cvskd->maj_fault.value.ui64 = cvs->maj_fault;
   3287 	cvskd->cow_fault.value.ui64 = cvs->cow_fault;
   3288 	cvskd->prot_fault.value.ui64 = cvs->prot_fault;
   3289 	cvskd->softlock.value.ui64 = cvs->softlock;
   3290 	cvskd->kernel_asflt.value.ui64 = cvs->kernel_asflt;
   3291 	cvskd->pgrrun.value.ui64 = cvs->pgrrun;
   3292 	cvskd->execpgin.value.ui64 = cvs->execpgin;
   3293 	cvskd->execpgout.value.ui64 = cvs->execpgout;
   3294 	cvskd->execfree.value.ui64 = cvs->execfree;
   3295 	cvskd->anonpgin.value.ui64 = cvs->anonpgin;
   3296 	cvskd->anonpgout.value.ui64 = cvs->anonpgout;
   3297 	cvskd->anonfree.value.ui64 = cvs->anonfree;
   3298 	cvskd->fspgin.value.ui64 = cvs->fspgin;
   3299 	cvskd->fspgout.value.ui64 = cvs->fspgout;
   3300 	cvskd->fsfree.value.ui64 = cvs->fsfree;
   3301 
   3302 	return (0);
   3303 }
   3304 
   3305 static int
   3306 cpu_stat_ks_update(kstat_t *ksp, int rw)
   3307 {
   3308 	cpu_stat_t *cso;
   3309 	cpu_t *cp;
   3310 	int i;
   3311 	hrtime_t msnsecs[NCMSTATES];
   3312 
   3313 	cso = (cpu_stat_t *)ksp->ks_data;
   3314 	cp = (cpu_t *)ksp->ks_private;
   3315 
   3316 	if (rw == KSTAT_WRITE)
   3317 		return (EACCES);
   3318 
   3319 	/*
   3320 	 * Read CPU mstate, but compare with the last values we
   3321 	 * received to make sure that the returned kstats never
   3322 	 * decrease.
   3323 	 */
   3324 
   3325 	get_cpu_mstate(cp, msnsecs);
   3326 	msnsecs[CMS_IDLE] = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
   3327 	msnsecs[CMS_USER] = NSEC_TO_TICK(msnsecs[CMS_USER]);
   3328 	msnsecs[CMS_SYSTEM] = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
   3329 	if (cso->cpu_sysinfo.cpu[CPU_IDLE] < msnsecs[CMS_IDLE])
   3330 		cso->cpu_sysinfo.cpu[CPU_IDLE] = msnsecs[CMS_IDLE];
   3331 	if (cso->cpu_sysinfo.cpu[CPU_USER] < msnsecs[CMS_USER])
   3332 		cso->cpu_sysinfo.cpu[CPU_USER] = msnsecs[CMS_USER];
   3333 	if (cso->cpu_sysinfo.cpu[CPU_KERNEL] < msnsecs[CMS_SYSTEM])
   3334 		cso->cpu_sysinfo.cpu[CPU_KERNEL] = msnsecs[CMS_SYSTEM];
   3335 	cso->cpu_sysinfo.cpu[CPU_WAIT] 	= 0;
   3336 	cso->cpu_sysinfo.wait[W_IO] 	= 0;
   3337 	cso->cpu_sysinfo.wait[W_SWAP]	= 0;
   3338 	cso->cpu_sysinfo.wait[W_PIO]	= 0;
   3339 	cso->cpu_sysinfo.bread 		= CPU_STATS(cp, sys.bread);
   3340 	cso->cpu_sysinfo.bwrite 	= CPU_STATS(cp, sys.bwrite);
   3341 	cso->cpu_sysinfo.lread 		= CPU_STATS(cp, sys.lread);
   3342 	cso->cpu_sysinfo.lwrite 	= CPU_STATS(cp, sys.lwrite);
   3343 	cso->cpu_sysinfo.phread 	= CPU_STATS(cp, sys.phread);
   3344 	cso->cpu_sysinfo.phwrite 	= CPU_STATS(cp, sys.phwrite);
   3345 	cso->cpu_sysinfo.pswitch 	= CPU_STATS(cp, sys.pswitch);
   3346 	cso->cpu_sysinfo.trap 		= CPU_STATS(cp, sys.trap);
   3347 	cso->cpu_sysinfo.intr		= 0;
   3348 	for (i = 0; i < PIL_MAX; i++)
   3349 		cso->cpu_sysinfo.intr += CPU_STATS(cp, sys.intr[i]);
   3350 	cso->cpu_sysinfo.syscall	= CPU_STATS(cp, sys.syscall);
   3351 	cso->cpu_sysinfo.sysread	= CPU_STATS(cp, sys.sysread);
   3352 	cso->cpu_sysinfo.syswrite	= CPU_STATS(cp, sys.syswrite);
   3353 	cso->cpu_sysinfo.sysfork	= CPU_STATS(cp, sys.sysfork);
   3354 	cso->cpu_sysinfo.sysvfork	= CPU_STATS(cp, sys.sysvfork);
   3355 	cso->cpu_sysinfo.sysexec	= CPU_STATS(cp, sys.sysexec);
   3356 	cso->cpu_sysinfo.readch		= CPU_STATS(cp, sys.readch);
   3357 	cso->cpu_sysinfo.writech	= CPU_STATS(cp, sys.writech);
   3358 	cso->cpu_sysinfo.rcvint		= CPU_STATS(cp, sys.rcvint);
   3359 	cso->cpu_sysinfo.xmtint		= CPU_STATS(cp, sys.xmtint);
   3360 	cso->cpu_sysinfo.mdmint		= CPU_STATS(cp, sys.mdmint);
   3361 	cso->cpu_sysinfo.rawch		= CPU_STATS(cp, sys.rawch);
   3362 	cso->cpu_sysinfo.canch		= CPU_STATS(cp, sys.canch);
   3363 	cso->cpu_sysinfo.outch		= CPU_STATS(cp, sys.outch);
   3364 	cso->cpu_sysinfo.msg		= CPU_STATS(cp, sys.msg);
   3365 	cso->cpu_sysinfo.sema		= CPU_STATS(cp, sys.sema);
   3366 	cso->cpu_sysinfo.namei		= CPU_STATS(cp, sys.namei);
   3367 	cso->cpu_sysinfo.ufsiget	= CPU_STATS(cp, sys.ufsiget);
   3368 	cso->cpu_sysinfo.ufsdirblk	= CPU_STATS(cp, sys.ufsdirblk);
   3369 	cso->cpu_sysinfo.ufsipage	= CPU_STATS(cp, sys.ufsipage);
   3370 	cso->cpu_sysinfo.ufsinopage	= CPU_STATS(cp, sys.ufsinopage);
   3371 	cso->cpu_sysinfo.inodeovf	= 0;
   3372 	cso->cpu_sysinfo.fileovf	= 0;
   3373 	cso->cpu_sysinfo.procovf	= CPU_STATS(cp, sys.procovf);
   3374 	cso->cpu_sysinfo.intrthread	= 0;
   3375 	for (i = 0; i < LOCK_LEVEL - 1; i++)
   3376 		cso->cpu_sysinfo.intrthread += CPU_STATS(cp, sys.intr[i]);
   3377 	cso->cpu_sysinfo.intrblk	= CPU_STATS(cp, sys.intrblk);
   3378 	cso->cpu_sysinfo.idlethread	= CPU_STATS(cp, sys.idlethread);
   3379 	cso->cpu_sysinfo.inv_swtch	= CPU_STATS(cp, sys.inv_swtch);
   3380 	cso->cpu_sysinfo.nthreads	= CPU_STATS(cp, sys.nthreads);
   3381 	cso->cpu_sysinfo.cpumigrate	= CPU_STATS(cp, sys.cpumigrate);
   3382 	cso->cpu_sysinfo.xcalls		= CPU_STATS(cp, sys.xcalls);
   3383 	cso->cpu_sysinfo.mutex_adenters	= CPU_STATS(cp, sys.mutex_adenters);
   3384 	cso->cpu_sysinfo.rw_rdfails	= CPU_STATS(cp, sys.rw_rdfails);
   3385 	cso->cpu_sysinfo.rw_wrfails	= CPU_STATS(cp, sys.rw_wrfails);
   3386 	cso->cpu_sysinfo.modload	= CPU_STATS(cp, sys.modload);
   3387 	cso->cpu_sysinfo.modunload	= CPU_STATS(cp, sys.modunload);
   3388 	cso->cpu_sysinfo.bawrite	= CPU_STATS(cp, sys.bawrite);
   3389 	cso->cpu_sysinfo.rw_enters	= 0;
   3390 	cso->cpu_sysinfo.win_uo_cnt	= 0;
   3391 	cso->cpu_sysinfo.win_uu_cnt	= 0;
   3392 	cso->cpu_sysinfo.win_so_cnt	= 0;
   3393 	cso->cpu_sysinfo.win_su_cnt	= 0;
   3394 	cso->cpu_sysinfo.win_suo_cnt	= 0;
   3395 
   3396 	cso->cpu_syswait.iowait		= CPU_STATS(cp, sys.iowait);
   3397 	cso->cpu_syswait.swap		= 0;
   3398 	cso->cpu_syswait.physio		= 0;
   3399 
   3400 	cso->cpu_vminfo.pgrec		= CPU_STATS(cp, vm.pgrec);
   3401 	cso->cpu_vminfo.pgfrec		= CPU_STATS(cp, vm.pgfrec);
   3402 	cso->cpu_vminfo.pgin		= CPU_STATS(cp, vm.pgin);
   3403 	cso->cpu_vminfo.pgpgin		= CPU_STATS(cp, vm.pgpgin);
   3404 	cso->cpu_vminfo.pgout		= CPU_STATS(cp, vm.pgout);
   3405 	cso->cpu_vminfo.pgpgout		= CPU_STATS(cp, vm.pgpgout);
   3406 	cso->cpu_vminfo.swapin		= CPU_STATS(cp, vm.swapin);
   3407 	cso->cpu_vminfo.pgswapin	= CPU_STATS(cp, vm.pgswapin);
   3408 	cso->cpu_vminfo.swapout		= CPU_STATS(cp, vm.swapout);
   3409 	cso->cpu_vminfo.pgswapout	= CPU_STATS(cp, vm.pgswapout);
   3410 	cso->cpu_vminfo.zfod		= CPU_STATS(cp, vm.zfod);
   3411 	cso->cpu_vminfo.dfree		= CPU_STATS(cp, vm.dfree);
   3412 	cso->cpu_vminfo.scan		= CPU_STATS(cp, vm.scan);
   3413 	cso->cpu_vminfo.rev		= CPU_STATS(cp, vm.rev);
   3414 	cso->cpu_vminfo.hat_fault	= CPU_STATS(cp, vm.hat_fault);
   3415 	cso->cpu_vminfo.as_fault	= CPU_STATS(cp, vm.as_fault);
   3416 	cso->cpu_vminfo.maj_fault	= CPU_STATS(cp, vm.maj_fault);
   3417 	cso->cpu_vminfo.cow_fault	= CPU_STATS(cp, vm.cow_fault);
   3418 	cso->cpu_vminfo.prot_fault	= CPU_STATS(cp, vm.prot_fault);
   3419 	cso->cpu_vminfo.softlock	= CPU_STATS(cp, vm.softlock);
   3420 	cso->cpu_vminfo.kernel_asflt	= CPU_STATS(cp, vm.kernel_asflt);
   3421 	cso->cpu_vminfo.pgrrun		= CPU_STATS(cp, vm.pgrrun);
   3422 	cso->cpu_vminfo.execpgin	= CPU_STATS(cp, vm.execpgin);
   3423 	cso->cpu_vminfo.execpgout	= CPU_STATS(cp, vm.execpgout);
   3424 	cso->cpu_vminfo.execfree	= CPU_STATS(cp, vm.execfree);
   3425 	cso->cpu_vminfo.anonpgin	= CPU_STATS(cp, vm.anonpgin);
   3426 	cso->cpu_vminfo.anonpgout	= CPU_STATS(cp, vm.anonpgout);
   3427 	cso->cpu_vminfo.anonfree	= CPU_STATS(cp, vm.anonfree);
   3428 	cso->cpu_vminfo.fspgin		= CPU_STATS(cp, vm.fspgin);
   3429 	cso->cpu_vminfo.fspgout		= CPU_STATS(cp, vm.fspgout);
   3430 	cso->cpu_vminfo.fsfree		= CPU_STATS(cp, vm.fsfree);
   3431 
   3432 	return (0);
   3433 }
   3434