Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/systm.h>
     27 #include <sys/archsystm.h>
     28 #include <sys/machsystm.h>
     29 #include <sys/cpuvar.h>
     30 #include <sys/intreg.h>
     31 #include <sys/x_call.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/membar.h>
     34 #include <sys/disp.h>
     35 #include <sys/debug.h>
     36 #include <sys/privregs.h>
     37 #include <sys/xc_impl.h>
     38 #include <sys/ivintr.h>
     39 #include <sys/dmv.h>
     40 #include <sys/sysmacros.h>
     41 
     42 #ifdef TRAPTRACE
     43 uint_t x_dstat[NCPU][XC_LOOP_EXIT+1];
     44 uint_t x_rstat[NCPU][4];
     45 #endif /* TRAPTRACE */
     46 
     47 static uint64_t xc_serv_inum;	/* software interrupt number for xc_serv() */
     48 static uint64_t xc_loop_inum;	/* software interrupt number for xc_loop() */
     49 kmutex_t xc_sys_mutex;		/* protect xcall session and xc_mbox */
     50 int xc_spl_enter[NCPU];		/* protect sending x-call */
     51 static int xc_holder = -1; /* the cpu who initiates xc_attention, 0 is valid */
     52 
     53 /*
     54  * Mail box for handshaking and xcall request; protected by xc_sys_mutex
     55  */
     56 static struct xc_mbox {
     57 	xcfunc_t *xc_func;
     58 	uint64_t xc_arg1;
     59 	uint64_t xc_arg2;
     60 	cpuset_t xc_cpuset;
     61 	volatile uint_t	xc_state;
     62 } xc_mbox[NCPU];
     63 
     64 uint64_t xc_tick_limit;		/* send_mondo() tick limit value */
     65 uint64_t xc_tick_limit_scale = 1;	/* scale used to increase the limit */
     66 uint64_t xc_tick_jump_limit;	/* send_mondo() irregular tick jump limit */
     67 uint64_t xc_sync_tick_limit;	/* timeout limit for xt_sync() calls */
     68 
     69 /* timeout value for xcalls to be received by the target CPU */
     70 uint64_t xc_mondo_time_limit;
     71 
     72 /* timeout value for xcall functions to be executed on the target CPU */
     73 uint64_t xc_func_time_limit;
     74 
     75 uint64_t xc_scale = 1;	/* scale used to calculate timeout limits */
     76 uint64_t xc_mondo_multiplier = 10;
     77 
     78 uint_t sendmondo_in_recover;
     79 
     80 /*
     81  * sending x-calls
     82  */
     83 void	init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2);
     84 void	send_one_mondo(int cpuid);
     85 void	send_mondo_set(cpuset_t set);
     86 
     87 /*
     88  * Adjust xc_attention timeout if a faster cpu is dynamically added.
     89  * Ignore the dynamic removal of a cpu that would lower these timeout
     90  * values.
     91  */
     92 static int
     93 xc_func_timeout_adj(cpu_setup_t what, int cpuid) {
     94 	uint64_t freq = cpunodes[cpuid].clock_freq;
     95 
     96 	switch (what) {
     97 	case CPU_ON:
     98 	case CPU_INIT:
     99 	case CPU_CONFIG:
    100 	case CPU_CPUPART_IN:
    101 		if (freq * xc_scale > xc_mondo_time_limit) {
    102 			xc_mondo_time_limit = freq * xc_scale;
    103 			xc_func_time_limit = xc_mondo_time_limit *
    104 			    xc_mondo_multiplier;
    105 		}
    106 		break;
    107 	case CPU_OFF:
    108 	case CPU_UNCONFIG:
    109 	case CPU_CPUPART_OUT:
    110 	default:
    111 		break;
    112 	}
    113 
    114 	return (0);
    115 }
    116 
    117 /*
    118  * xc_init - initialize x-call related locks
    119  */
    120 void
    121 xc_init(void)
    122 {
    123 	int pix;
    124 	uint64_t maxfreq = 0;
    125 
    126 	mutex_init(&xc_sys_mutex, NULL, MUTEX_SPIN,
    127 	    (void *)ipltospl(XCALL_PIL));
    128 
    129 #ifdef TRAPTRACE
    130 	/* Initialize for all possible CPUs. */
    131 	for (pix = 0; pix < NCPU; pix++) {
    132 		XC_STAT_INIT(pix);
    133 	}
    134 #endif /* TRAPTRACE */
    135 
    136 	xc_serv_inum = add_softintr(XCALL_PIL, (softintrfunc)xc_serv, 0,
    137 	    SOFTINT_MT);
    138 	xc_loop_inum = add_softintr(XCALL_PIL, (softintrfunc)xc_loop, 0,
    139 	    SOFTINT_MT);
    140 
    141 	/*
    142 	 * Initialize the calibrated tick limit for send_mondo.
    143 	 * The value represents the maximum tick count to wait.
    144 	 */
    145 	xc_tick_limit =
    146 	    ((uint64_t)sys_tick_freq * XC_SEND_MONDO_MSEC) / 1000;
    147 	xc_tick_jump_limit = xc_tick_limit / 32;
    148 	xc_tick_limit *= xc_tick_limit_scale;
    149 	xc_sync_tick_limit = xc_tick_limit;
    150 
    151 	/*
    152 	 * Maximum number of loops to wait before timing out in xc_attention.
    153 	 */
    154 	for (pix = 0; pix < NCPU; pix++) {
    155 		maxfreq = MAX(cpunodes[pix].clock_freq, maxfreq);
    156 	}
    157 	xc_mondo_time_limit = maxfreq * xc_scale;
    158 	register_cpu_setup_func((cpu_setup_func_t *)xc_func_timeout_adj, NULL);
    159 
    160 	/*
    161 	 * Maximum number of loops to wait for a xcall function to be
    162 	 * executed on the target CPU.
    163 	 */
    164 	xc_func_time_limit = xc_mondo_time_limit * xc_mondo_multiplier;
    165 }
    166 
    167 /*
    168  * The following routines basically provide callers with two kinds of
    169  * inter-processor interrupt services:
    170  *	1. cross calls (x-calls) - requests are handled at target cpu's TL=0
    171  *	2. cross traps (c-traps) - requests are handled at target cpu's TL>0
    172  *
    173  * Although these routines protect the services from migrating to other cpus
    174  * "after" they are called, it is the caller's choice or responsibility to
    175  * prevent the cpu migration "before" calling them.
    176  *
    177  * X-call routines:
    178  *
    179  *	xc_one()  - send a request to one processor
    180  *	xc_some() - send a request to some processors
    181  *	xc_all()  - send a request to all processors
    182  *
    183  *	Their common parameters:
    184  *		func - a TL=0 handler address
    185  *		arg1 and arg2  - optional
    186  *
    187  *	The services provided by x-call routines allow callers
    188  *	to send a request to target cpus to execute a TL=0
    189  *	handler.
    190  *	The interface of the registers of the TL=0 handler:
    191  *		%o0: arg1
    192  *		%o1: arg2
    193  *
    194  * X-trap routines:
    195  *
    196  *	xt_one()  - send a request to one processor
    197  *	xt_some() - send a request to some processors
    198  *	xt_all()  - send a request to all processors
    199  *
    200  *	Their common parameters:
    201  *		func - a TL>0 handler address or an interrupt number
    202  *		arg1, arg2
    203  *		       optional when "func" is an address;
    204  *		       0        when "func" is an interrupt number
    205  *
    206  *	If the request of "func" is a kernel address, then
    207  *	the target cpu will execute the request of "func" with
    208  *	args at "TL>0" level.
    209  *	The interface of the registers of the TL>0 handler:
    210  *		%g1: arg1
    211  *		%g2: arg2
    212  *
    213  *	If the request of "func" is not a kernel address, then it has
    214  *	to be an assigned interrupt number through add_softintr().
    215  *	An interrupt number is an index to the interrupt vector table,
    216  *	which entry contains an interrupt handler address with its
    217  *	corresponding interrupt level and argument.
    218  *	The target cpu will arrange the request to be serviced according
    219  *	to its pre-registered information.
    220  *	args are assumed to be zeros in this case.
    221  *
    222  * In addition, callers are allowed to capture and release cpus by
    223  * calling the routines: xc_attention() and xc_dismissed().
    224  */
    225 
    226 /*
    227  * spl_xcall - set PIL to xcall level
    228  */
    229 int
    230 spl_xcall(void)
    231 {
    232 	return (splr(XCALL_PIL));
    233 }
    234 
    235 /*
    236  * xt_one - send a "x-trap" to a cpu
    237  */
    238 void
    239 xt_one(int cix, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
    240 {
    241 	if (!CPU_IN_SET(cpu_ready_set, cix)) {
    242 		return;
    243 	}
    244 	xt_one_unchecked(cix, func, arg1, arg2);
    245 }
    246 
    247 /*
    248  * xt_one_unchecked - send a "x-trap" to a cpu without checking for its
    249  * existance in cpu_ready_set
    250  */
    251 void
    252 xt_one_unchecked(int cix, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
    253 {
    254 	int lcx;
    255 	int opl;
    256 	cpuset_t tset;
    257 
    258 	/*
    259 	 * Make sure the function address will not be interpreted as a
    260 	 * dmv interrupt
    261 	 */
    262 	ASSERT(!DMV_IS_DMV(func));
    263 
    264 	/*
    265 	 * It's illegal to send software inums through the cross-trap
    266 	 * interface.
    267 	 */
    268 	ASSERT((uintptr_t)func >= KERNELBASE);
    269 
    270 	CPUSET_ZERO(tset);
    271 
    272 	XC_SPL_ENTER(lcx, opl);			/* lcx set by the macro */
    273 
    274 	CPUSET_ADD(tset, cix);
    275 
    276 	if (cix == lcx) {
    277 		/*
    278 		 * same cpu - use software fast trap
    279 		 */
    280 		send_self_xcall(CPU, arg1, arg2, func);
    281 		XC_STAT_INC(x_dstat[lcx][XT_ONE_SELF]);
    282 		XC_TRACE(XT_ONE_SELF, &tset, func, arg1, arg2);
    283 	} else {	/* other cpu - send a mondo to the target cpu */
    284 		/*
    285 		 * other cpu - send a mondo to the target cpu
    286 		 */
    287 		XC_TRACE(XT_ONE_OTHER, &tset, func, arg1, arg2);
    288 		init_mondo(func, arg1, arg2);
    289 		send_one_mondo(cix);
    290 		XC_STAT_INC(x_dstat[lcx][XT_ONE_OTHER]);
    291 	}
    292 	XC_SPL_EXIT(lcx, opl);
    293 }
    294 
    295 /*
    296  * xt_some - send a "x-trap" to some cpus
    297  */
    298 void
    299 xt_some(cpuset_t cpuset, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
    300 {
    301 	int lcx;
    302 	int opl;
    303 	cpuset_t xc_cpuset, tset;
    304 
    305 	/*
    306 	 * Make sure the function address will not be interpreted as a
    307 	 * dmv interrupt
    308 	 */
    309 	ASSERT(!DMV_IS_DMV(func));
    310 
    311 	/*
    312 	 * It's illegal to send software inums through the cross-trap
    313 	 * interface.
    314 	 */
    315 	ASSERT((uintptr_t)func >= KERNELBASE);
    316 
    317 	CPUSET_ZERO(tset);
    318 
    319 	XC_SPL_ENTER(lcx, opl);		/* lcx set by the macro */
    320 
    321 	CPUSET_ADD(tset, lcx);
    322 
    323 	/*
    324 	 * only send to the CPU_READY ones
    325 	 */
    326 	xc_cpuset = cpu_ready_set;
    327 	CPUSET_AND(xc_cpuset, cpuset);
    328 
    329 	/*
    330 	 * send to nobody; just return
    331 	 */
    332 	if (CPUSET_ISNULL(xc_cpuset)) {
    333 		XC_SPL_EXIT(lcx, opl);
    334 		return;
    335 	}
    336 
    337 	/*
    338 	 * don't send mondo to self
    339 	 */
    340 	if (CPU_IN_SET(xc_cpuset, lcx)) {
    341 		/*
    342 		 * same cpu - use software fast trap
    343 		 */
    344 		send_self_xcall(CPU, arg1, arg2, func);
    345 		XC_STAT_INC(x_dstat[lcx][XT_SOME_SELF]);
    346 		XC_TRACE(XT_SOME_SELF, &tset, func, arg1, arg2);
    347 		CPUSET_DEL(xc_cpuset, lcx);
    348 		if (CPUSET_ISNULL(xc_cpuset)) {
    349 			XC_SPL_EXIT(lcx, opl);
    350 			return;
    351 		}
    352 	}
    353 	XC_TRACE(XT_SOME_OTHER, &xc_cpuset, func, arg1, arg2);
    354 	init_mondo(func, arg1, arg2);
    355 	send_mondo_set(xc_cpuset);
    356 	XC_STAT_INC(x_dstat[lcx][XT_SOME_OTHER]);
    357 
    358 	XC_SPL_EXIT(lcx, opl);
    359 }
    360 
    361 /*
    362  * xt_all - send a "x-trap" to all cpus
    363  */
    364 void
    365 xt_all(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
    366 {
    367 	int lcx;
    368 	int opl;
    369 	cpuset_t xc_cpuset, tset;
    370 
    371 	/*
    372 	 * Make sure the function address will not be interpreted as a
    373 	 * dmv interrupt
    374 	 */
    375 	ASSERT(!DMV_IS_DMV(func));
    376 
    377 	/*
    378 	 * It's illegal to send software inums through the cross-trap
    379 	 * interface.
    380 	 */
    381 	ASSERT((uintptr_t)func >= KERNELBASE);
    382 
    383 	CPUSET_ZERO(tset);
    384 
    385 	XC_SPL_ENTER(lcx, opl);		/* lcx set by the macro */
    386 
    387 	CPUSET_ADD(tset, lcx);
    388 
    389 	/*
    390 	 * same cpu - use software fast trap
    391 	 */
    392 	if (CPU_IN_SET(cpu_ready_set, lcx))
    393 		send_self_xcall(CPU, arg1, arg2, func);
    394 
    395 	XC_TRACE(XT_ALL_OTHER, &cpu_ready_set, func, arg1, arg2);
    396 
    397 	/*
    398 	 * don't send mondo to self
    399 	 */
    400 	xc_cpuset = cpu_ready_set;
    401 	CPUSET_DEL(xc_cpuset, lcx);
    402 
    403 	if (CPUSET_ISNULL(xc_cpuset)) {
    404 		XC_STAT_INC(x_dstat[lcx][XT_ALL_SELF]);
    405 		XC_TRACE(XT_ALL_SELF, &tset, func, arg1, arg2);
    406 		XC_SPL_EXIT(lcx, opl);
    407 		return;
    408 	}
    409 
    410 	init_mondo(func, arg1, arg2);
    411 	send_mondo_set(xc_cpuset);
    412 
    413 	XC_STAT_INC(x_dstat[lcx][XT_ALL_OTHER]);
    414 	XC_SPL_EXIT(lcx, opl);
    415 }
    416 
    417 /*
    418  * xc_one - send a "x-call" to a cpu
    419  */
    420 void
    421 xc_one(int cix, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
    422 {
    423 	int lcx;
    424 	int opl;
    425 	uint64_t loop_cnt = 0;
    426 	cpuset_t tset;
    427 	int first_time = 1;
    428 
    429 	/*
    430 	 * send to nobody; just return
    431 	 */
    432 	if (!CPU_IN_SET(cpu_ready_set, cix))
    433 		return;
    434 
    435 	ASSERT((uintptr_t)func > KERNELBASE);
    436 	ASSERT(((uintptr_t)func % PC_ALIGN) == 0);
    437 
    438 	CPUSET_ZERO(tset);
    439 
    440 	kpreempt_disable();
    441 
    442 	XC_SPL_ENTER(lcx, opl);		/* lcx set by the macro */
    443 
    444 	CPUSET_ADD(tset, cix);
    445 
    446 	if (cix == lcx) {	/* same cpu just do it */
    447 		XC_TRACE(XC_ONE_SELF, &tset, func, arg1, arg2);
    448 		(*func)(arg1, arg2);
    449 		XC_STAT_INC(x_dstat[lcx][XC_ONE_SELF]);
    450 		XC_SPL_EXIT(lcx, opl);
    451 		kpreempt_enable();
    452 		return;
    453 	}
    454 
    455 	if (xc_holder == lcx) {		/* got the xc_sys_mutex already */
    456 		ASSERT(MUTEX_HELD(&xc_sys_mutex));
    457 		ASSERT(CPU_IN_SET(xc_mbox[lcx].xc_cpuset, lcx));
    458 		ASSERT(CPU_IN_SET(xc_mbox[cix].xc_cpuset, cix));
    459 		ASSERT(xc_mbox[cix].xc_state == XC_WAIT);
    460 		XC_TRACE(XC_ONE_OTHER_H, &tset, func, arg1, arg2);
    461 
    462 		/*
    463 		 * target processor's xc_loop should be waiting
    464 		 * for the work to do; just set up the xc_mbox
    465 		 */
    466 		XC_SETUP(cix, func, arg1, arg2);
    467 		membar_stld();
    468 
    469 		while (xc_mbox[cix].xc_state != XC_WAIT) {
    470 			if (loop_cnt++ > xc_func_time_limit) {
    471 				if (sendmondo_in_recover) {
    472 					drv_usecwait(1);
    473 					loop_cnt = 0;
    474 					continue;
    475 				}
    476 				cmn_err(CE_PANIC, "xc_one() timeout, "
    477 				    "xc_state[%d] != XC_WAIT", cix);
    478 			}
    479 		}
    480 		XC_STAT_INC(x_dstat[lcx][XC_ONE_OTHER_H]);
    481 		XC_SPL_EXIT(lcx, opl);
    482 		kpreempt_enable();
    483 		return;
    484 	}
    485 
    486 	/*
    487 	 * Avoid dead lock if someone has sent us a xc_loop request while
    488 	 * we are trying to grab xc_sys_mutex.
    489 	 */
    490 	XC_SPL_EXIT(lcx, opl);
    491 
    492 	/*
    493 	 * At this point, since we don't own xc_sys_mutex,
    494 	 * our pil shouldn't run at or above the XCALL_PIL.
    495 	 */
    496 	ASSERT(getpil() < XCALL_PIL);
    497 
    498 	/*
    499 	 * Since xc_holder is not owned by us, it could be that
    500 	 * no one owns it, or we are not informed to enter into
    501 	 * xc_loop(). In either case, we need to grab the
    502 	 * xc_sys_mutex before we write to the xc_mbox, and
    503 	 * we shouldn't release it until the request is finished.
    504 	 */
    505 
    506 	mutex_enter(&xc_sys_mutex);
    507 	xc_spl_enter[lcx] = 1;
    508 
    509 	/*
    510 	 * Since we own xc_sys_mutex now, we are safe to
    511 	 * write to the xc_mbox.
    512 	 */
    513 	ASSERT(xc_mbox[cix].xc_state == XC_IDLE);
    514 	XC_TRACE(XC_ONE_OTHER, &tset, func, arg1, arg2);
    515 	XC_SETUP(cix, func, arg1, arg2);
    516 	init_mondo(setsoftint_tl1, xc_serv_inum, 0);
    517 	send_one_mondo(cix);
    518 	xc_spl_enter[lcx] = 0;
    519 
    520 	/* xc_serv does membar_stld */
    521 	while (xc_mbox[cix].xc_state != XC_IDLE) {
    522 		if (loop_cnt++ > xc_func_time_limit) {
    523 			if (sendmondo_in_recover) {
    524 				drv_usecwait(1);
    525 				loop_cnt = 0;
    526 				continue;
    527 			}
    528 			if (first_time) {
    529 				XT_SYNC_ONE(cix);
    530 				first_time = 0;
    531 				loop_cnt = 0;
    532 				continue;
    533 			}
    534 			cmn_err(CE_PANIC, "xc_one() timeout, "
    535 			    "xc_state[%d] != XC_IDLE", cix);
    536 		}
    537 	}
    538 	XC_STAT_INC(x_dstat[lcx][XC_ONE_OTHER]);
    539 	mutex_exit(&xc_sys_mutex);
    540 
    541 	kpreempt_enable();
    542 }
    543 
    544 /*
    545  * xc_some - send a "x-call" to some cpus; sending to self is excluded
    546  */
    547 void
    548 xc_some(cpuset_t cpuset, xcfunc_t *func, uint64_t arg1, uint64_t arg2)
    549 {
    550 	int lcx;
    551 	int opl;
    552 	cpuset_t xc_cpuset, tset;
    553 
    554 	ASSERT((uintptr_t)func > KERNELBASE);
    555 	ASSERT(((uintptr_t)func % PC_ALIGN) == 0);
    556 
    557 	CPUSET_ZERO(tset);
    558 
    559 	kpreempt_disable();
    560 	XC_SPL_ENTER(lcx, opl);			/* lcx set by the macro */
    561 
    562 	CPUSET_ADD(tset, lcx);
    563 
    564 	/*
    565 	 * only send to the CPU_READY ones
    566 	 */
    567 	xc_cpuset = cpu_ready_set;
    568 	CPUSET_AND(xc_cpuset, cpuset);
    569 
    570 	/*
    571 	 * send to nobody; just return
    572 	 */
    573 	if (CPUSET_ISNULL(xc_cpuset)) {
    574 		XC_SPL_EXIT(lcx, opl);
    575 		kpreempt_enable();
    576 		return;
    577 	}
    578 
    579 	if (CPU_IN_SET(xc_cpuset, lcx)) {
    580 		/*
    581 		 * same cpu just do it
    582 		 */
    583 		(*func)(arg1, arg2);
    584 		CPUSET_DEL(xc_cpuset, lcx);
    585 		if (CPUSET_ISNULL(xc_cpuset)) {
    586 			XC_STAT_INC(x_dstat[lcx][XC_SOME_SELF]);
    587 			XC_TRACE(XC_SOME_SELF, &tset, func, arg1, arg2);
    588 			XC_SPL_EXIT(lcx, opl);
    589 			kpreempt_enable();
    590 			return;
    591 		}
    592 	}
    593 
    594 	if (xc_holder == lcx) {		/* got the xc_sys_mutex already */
    595 		cpuset_t mset = xc_mbox[lcx].xc_cpuset;
    596 
    597 		CPUSET_AND(mset, cpuset);
    598 		ASSERT(MUTEX_HELD(&xc_sys_mutex));
    599 		ASSERT(CPUSET_ISEQUAL(mset, cpuset));
    600 		SEND_MBOX_ONLY(xc_cpuset, func, arg1, arg2, lcx, XC_WAIT);
    601 		WAIT_MBOX_DONE(xc_cpuset, lcx, XC_WAIT, 0);
    602 		XC_STAT_INC(x_dstat[lcx][XC_SOME_OTHER_H]);
    603 		XC_TRACE(XC_SOME_OTHER_H, &xc_cpuset, func, arg1, arg2);
    604 		XC_SPL_EXIT(lcx, opl);
    605 		kpreempt_enable();
    606 		return;
    607 	}
    608 
    609 	/*
    610 	 * Avoid dead lock if someone has sent us a xc_loop request while
    611 	 * we are trying to grab xc_sys_mutex.
    612 	 */
    613 	XC_SPL_EXIT(lcx, opl);
    614 
    615 	/*
    616 	 * At this point, since we don't own xc_sys_mutex,
    617 	 * our pil shouldn't run at or above the XCALL_PIL.
    618 	 */
    619 	ASSERT(getpil() < XCALL_PIL);
    620 
    621 	/*
    622 	 * grab xc_sys_mutex before writing to the xc_mbox
    623 	 */
    624 	mutex_enter(&xc_sys_mutex);
    625 	xc_spl_enter[lcx] = 1;
    626 
    627 	XC_TRACE(XC_SOME_OTHER, &xc_cpuset, func, arg1, arg2);
    628 	init_mondo(setsoftint_tl1, xc_serv_inum, 0);
    629 	SEND_MBOX_MONDO(xc_cpuset, func, arg1, arg2, XC_IDLE);
    630 	WAIT_MBOX_DONE(xc_cpuset, lcx, XC_IDLE, 1);
    631 
    632 	xc_spl_enter[lcx] = 0;
    633 	XC_STAT_INC(x_dstat[lcx][XC_SOME_OTHER]);
    634 	mutex_exit(&xc_sys_mutex);
    635 	kpreempt_enable();
    636 }
    637 
    638 /*
    639  * xc_all - send a "x-call" to all cpus
    640  */
    641 void
    642 xc_all(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
    643 {
    644 	int lcx;
    645 	int opl;
    646 	cpuset_t xc_cpuset, tset;
    647 
    648 	ASSERT((uintptr_t)func > KERNELBASE);
    649 	ASSERT(((uintptr_t)func % PC_ALIGN) == 0);
    650 
    651 	CPUSET_ZERO(tset);
    652 
    653 	kpreempt_disable();
    654 	XC_SPL_ENTER(lcx, opl);			/* lcx set by the macro */
    655 
    656 	CPUSET_ADD(tset, lcx);
    657 
    658 	/*
    659 	 * same cpu just do it
    660 	 */
    661 	(*func)(arg1, arg2);
    662 	xc_cpuset = cpu_ready_set;
    663 	CPUSET_DEL(xc_cpuset, lcx);
    664 
    665 	if (CPUSET_ISNULL(xc_cpuset)) {
    666 		XC_STAT_INC(x_dstat[lcx][XC_ALL_SELF]);
    667 		XC_TRACE(XC_ALL_SELF, &tset, func, arg1, arg2);
    668 		XC_SPL_EXIT(lcx, opl);
    669 		kpreempt_enable();
    670 		return;
    671 	}
    672 
    673 	if (xc_holder == lcx) {		/* got the xc_sys_mutex already */
    674 		cpuset_t mset = xc_mbox[lcx].xc_cpuset;
    675 
    676 		CPUSET_AND(mset, xc_cpuset);
    677 		ASSERT(MUTEX_HELD(&xc_sys_mutex));
    678 		ASSERT(CPUSET_ISEQUAL(mset, xc_cpuset));
    679 		XC_TRACE(XC_ALL_OTHER_H, &xc_cpuset, func, arg1, arg2);
    680 		SEND_MBOX_ONLY(xc_cpuset, func, arg1, arg2, lcx, XC_WAIT);
    681 		WAIT_MBOX_DONE(xc_cpuset, lcx, XC_WAIT, 0);
    682 		XC_STAT_INC(x_dstat[lcx][XC_ALL_OTHER_H]);
    683 		XC_SPL_EXIT(lcx, opl);
    684 		kpreempt_enable();
    685 		return;
    686 	}
    687 
    688 	/*
    689 	 * Avoid dead lock if someone has sent us a xc_loop request while
    690 	 * we are trying to grab xc_sys_mutex.
    691 	 */
    692 	XC_SPL_EXIT(lcx, opl);
    693 
    694 	/*
    695 	 * At this point, since we don't own xc_sys_mutex,
    696 	 * our pil shouldn't run at or above the XCALL_PIL.
    697 	 */
    698 	ASSERT(getpil() < XCALL_PIL);
    699 
    700 	/*
    701 	 * grab xc_sys_mutex before writing to the xc_mbox
    702 	 */
    703 	mutex_enter(&xc_sys_mutex);
    704 	xc_spl_enter[lcx] = 1;
    705 
    706 	XC_TRACE(XC_ALL_OTHER, &xc_cpuset, func, arg1, arg2);
    707 	init_mondo(setsoftint_tl1, xc_serv_inum, 0);
    708 	SEND_MBOX_MONDO(xc_cpuset, func, arg1, arg2, XC_IDLE);
    709 	WAIT_MBOX_DONE(xc_cpuset, lcx, XC_IDLE, 1);
    710 
    711 	xc_spl_enter[lcx] = 0;
    712 	XC_STAT_INC(x_dstat[lcx][XC_ALL_OTHER]);
    713 	mutex_exit(&xc_sys_mutex);
    714 	kpreempt_enable();
    715 }
    716 
    717 /*
    718  * xc_attention - paired with xc_dismissed()
    719  *
    720  * xt_attention() holds the xc_sys_mutex and xc_dismissed() releases it
    721  * called when an initiator wants to capture some/all cpus for a critical
    722  * session.
    723  */
    724 void
    725 xc_attention(cpuset_t cpuset)
    726 {
    727 	int pix, lcx;
    728 	cpuset_t xc_cpuset, tmpset;
    729 	cpuset_t recv_cpuset;
    730 	uint64_t loop_cnt = 0;
    731 	int first_time = 1;
    732 
    733 	CPUSET_ZERO(recv_cpuset);
    734 
    735 	/*
    736 	 * don't migrate the cpu until xc_dismissed() is finished
    737 	 */
    738 	ASSERT(getpil() < XCALL_PIL);
    739 	mutex_enter(&xc_sys_mutex);
    740 	lcx = (int)(CPU->cpu_id);
    741 	ASSERT(x_dstat[lcx][XC_ATTENTION] ==
    742 	    x_dstat[lcx][XC_DISMISSED]);
    743 	ASSERT(xc_holder == -1);
    744 	xc_mbox[lcx].xc_cpuset = cpuset;
    745 	xc_holder = lcx; /* no membar; only current cpu needs the right lcx */
    746 
    747 	/*
    748 	 * only send to the CPU_READY ones
    749 	 */
    750 	xc_cpuset = cpu_ready_set;
    751 	CPUSET_AND(xc_cpuset, cpuset);
    752 
    753 	/*
    754 	 * don't send mondo to self
    755 	 */
    756 	CPUSET_DEL(xc_cpuset, lcx);
    757 
    758 	XC_STAT_INC(x_dstat[lcx][XC_ATTENTION]);
    759 	XC_TRACE(XC_ATTENTION, &xc_cpuset, NULL, NULL, NULL);
    760 
    761 	if (CPUSET_ISNULL(xc_cpuset))
    762 		return;
    763 
    764 	xc_spl_enter[lcx] = 1;
    765 	/*
    766 	 * inform the target processors to enter into xc_loop()
    767 	 */
    768 	init_mondo(setsoftint_tl1, xc_loop_inum, 0);
    769 	SEND_MBOX_MONDO_XC_ENTER(xc_cpuset);
    770 	xc_spl_enter[lcx] = 0;
    771 
    772 	/*
    773 	 * make sure target processors have entered into xc_loop()
    774 	 */
    775 	while (!CPUSET_ISEQUAL(recv_cpuset, xc_cpuset)) {
    776 		tmpset = xc_cpuset;
    777 		for (pix = 0; pix < NCPU; pix++) {
    778 			if (CPU_IN_SET(tmpset, pix)) {
    779 				/*
    780 				 * membar_stld() is done in xc_loop
    781 				 */
    782 				if (xc_mbox[pix].xc_state == XC_WAIT) {
    783 					CPUSET_ADD(recv_cpuset, pix);
    784 				}
    785 				CPUSET_DEL(tmpset, pix);
    786 				if (CPUSET_ISNULL(tmpset)) {
    787 					break;
    788 				}
    789 			}
    790 		}
    791 		if (loop_cnt++ > xc_mondo_time_limit) {
    792 			if (sendmondo_in_recover) {
    793 				drv_usecwait(1);
    794 				loop_cnt = 0;
    795 				continue;
    796 			}
    797 			if (first_time) {
    798 				XT_SYNC_SOME(xc_cpuset);
    799 				first_time = 0;
    800 				loop_cnt = 0;
    801 				continue;
    802 			}
    803 			cmn_err(CE_PANIC, "xc_attention() timeout");
    804 		}
    805 	}
    806 
    807 	/*
    808 	 * xc_sys_mutex remains held until xc_dismissed() is finished
    809 	 */
    810 }
    811 
    812 /*
    813  * xc_dismissed - paired with xc_attention()
    814  *
    815  * Called after the critical session is finished.
    816  */
    817 void
    818 xc_dismissed(cpuset_t cpuset)
    819 {
    820 	int pix;
    821 	int lcx = (int)(CPU->cpu_id);
    822 	cpuset_t xc_cpuset, tmpset;
    823 	cpuset_t recv_cpuset;
    824 	uint64_t loop_cnt = 0;
    825 
    826 	ASSERT(lcx == xc_holder);
    827 	ASSERT(CPUSET_ISEQUAL(xc_mbox[lcx].xc_cpuset, cpuset));
    828 	ASSERT(getpil() >= XCALL_PIL);
    829 	CPUSET_ZERO(xc_mbox[lcx].xc_cpuset);
    830 	CPUSET_ZERO(recv_cpuset);
    831 	membar_stld();
    832 
    833 	XC_STAT_INC(x_dstat[lcx][XC_DISMISSED]);
    834 	ASSERT(x_dstat[lcx][XC_DISMISSED] == x_dstat[lcx][XC_ATTENTION]);
    835 
    836 	/*
    837 	 * only send to the CPU_READY ones
    838 	 */
    839 	xc_cpuset = cpu_ready_set;
    840 	CPUSET_AND(xc_cpuset, cpuset);
    841 
    842 	/*
    843 	 * exclude itself
    844 	 */
    845 	CPUSET_DEL(xc_cpuset, lcx);
    846 	XC_TRACE(XC_DISMISSED, &xc_cpuset, NULL, NULL, NULL);
    847 	if (CPUSET_ISNULL(xc_cpuset)) {
    848 		xc_holder = -1;
    849 		mutex_exit(&xc_sys_mutex);
    850 		return;
    851 	}
    852 
    853 	/*
    854 	 * inform other processors to get out of xc_loop()
    855 	 */
    856 	tmpset = xc_cpuset;
    857 	for (pix = 0; pix < NCPU; pix++) {
    858 		if (CPU_IN_SET(tmpset, pix)) {
    859 			xc_mbox[pix].xc_state = XC_EXIT;
    860 			membar_stld();
    861 			CPUSET_DEL(tmpset, pix);
    862 			if (CPUSET_ISNULL(tmpset)) {
    863 				break;
    864 			}
    865 		}
    866 	}
    867 
    868 	/*
    869 	 * make sure target processors have exited from xc_loop()
    870 	 */
    871 	while (!CPUSET_ISEQUAL(recv_cpuset, xc_cpuset)) {
    872 		tmpset = xc_cpuset;
    873 		for (pix = 0; pix < NCPU; pix++) {
    874 			if (CPU_IN_SET(tmpset, pix)) {
    875 				/*
    876 				 * membar_stld() is done in xc_loop
    877 				 */
    878 				if (xc_mbox[pix].xc_state == XC_IDLE) {
    879 					CPUSET_ADD(recv_cpuset, pix);
    880 				}
    881 				CPUSET_DEL(tmpset, pix);
    882 				if (CPUSET_ISNULL(tmpset)) {
    883 					break;
    884 				}
    885 			}
    886 		}
    887 		if (loop_cnt++ > xc_func_time_limit) {
    888 				if (sendmondo_in_recover) {
    889 					drv_usecwait(1);
    890 					loop_cnt = 0;
    891 					continue;
    892 				}
    893 			cmn_err(CE_PANIC, "xc_dismissed() timeout");
    894 		}
    895 	}
    896 	xc_holder = -1;
    897 	mutex_exit(&xc_sys_mutex);
    898 }
    899 
    900 /*
    901  * xc_serv - "x-call" handler at TL=0; serves only one x-call request
    902  * runs at XCALL_PIL level.
    903  */
    904 uint_t
    905 xc_serv(void)
    906 {
    907 	int lcx = (int)(CPU->cpu_id);
    908 	struct xc_mbox *xmp;
    909 	xcfunc_t *func;
    910 	uint64_t arg1, arg2;
    911 	cpuset_t tset;
    912 
    913 	ASSERT(getpil() == XCALL_PIL);
    914 	CPUSET_ZERO(tset);
    915 	CPUSET_ADD(tset, lcx);
    916 	flush_windows();
    917 	xmp = &xc_mbox[lcx];
    918 	ASSERT(lcx != xc_holder);
    919 	ASSERT(xmp->xc_state == XC_DOIT);
    920 	func = xmp->xc_func;
    921 	XC_TRACE(XC_SERV, &tset, func, xmp->xc_arg1, xmp->xc_arg2);
    922 	if (func != NULL) {
    923 		arg1 = xmp->xc_arg1;
    924 		arg2 = xmp->xc_arg2;
    925 		(*func)(arg1, arg2);
    926 	}
    927 	XC_STAT_INC(x_rstat[lcx][XC_SERV]);
    928 	XC_TRACE(XC_SERV, &tset, func, arg1, arg2);
    929 	xmp->xc_state = XC_IDLE;
    930 	membar_stld();
    931 	return (1);
    932 }
    933 
    934 /*
    935  * if == 1, an xc_loop timeout will cause a panic
    936  * otherwise print a warning
    937  */
    938 uint_t xc_loop_panic = 0;
    939 
    940 /*
    941  * xc_loop - "x-call" handler at TL=0; capture the cpu for a critial
    942  * session, or serve multiple x-call requests runs at XCALL_PIL level.
    943  */
    944 uint_t
    945 xc_loop(void)
    946 {
    947 	int lcx = (int)(CPU->cpu_id);
    948 	struct xc_mbox *xmp;
    949 	xcfunc_t *func;
    950 	uint64_t arg1, arg2;
    951 	uint64_t loop_cnt = 0;
    952 	cpuset_t tset;
    953 
    954 	ASSERT(getpil() == XCALL_PIL);
    955 
    956 	CPUSET_ZERO(tset);
    957 	flush_windows();
    958 
    959 	/*
    960 	 * Some one must have owned the xc_sys_mutex;
    961 	 * no further interrupt (at XCALL_PIL or below) can
    962 	 * be taken by this processor until xc_loop exits.
    963 	 *
    964 	 * The owner of xc_sys_mutex (or xc_holder) can expect
    965 	 * its xc/xt requests are handled as follows:
    966 	 * 	xc requests use xc_mbox's handshaking for their services
    967 	 * 	xt requests at TL>0 will be handled immediately
    968 	 * 	xt requests at TL=0:
    969 	 *		if their handlers'pils are <= XCALL_PIL, then
    970 	 *			they will be handled after xc_loop exits
    971 	 *			(so, they probably should not be used)
    972 	 *		else they will be handled immediately
    973 	 *
    974 	 * For those who are not informed to enter xc_loop, if they
    975 	 * send xc/xt requests to this processor at this moment,
    976 	 * the requests will be handled as follows:
    977 	 *	xc requests will be handled after they grab xc_sys_mutex
    978 	 *	xt requests at TL>0 will be handled immediately
    979 	 * 	xt requests at TL=0:
    980 	 *		if their handlers'pils are <= XCALL_PIL, then
    981 	 *			they will be handled after xc_loop exits
    982 	 *		else they will be handled immediately
    983 	 */
    984 	xmp = &xc_mbox[lcx];
    985 	ASSERT(lcx != xc_holder);
    986 	ASSERT(xmp->xc_state == XC_ENTER);
    987 	xmp->xc_state = XC_WAIT;
    988 	CPUSET_ADD(tset, lcx);
    989 	membar_stld();
    990 	XC_STAT_INC(x_rstat[lcx][XC_LOOP]);
    991 	XC_TRACE(XC_LOOP_ENTER, &tset, NULL, NULL, NULL);
    992 	while (xmp->xc_state != XC_EXIT) {
    993 		if (xmp->xc_state == XC_DOIT) {
    994 			func = xmp->xc_func;
    995 			arg1 = xmp->xc_arg1;
    996 			arg2 = xmp->xc_arg2;
    997 			XC_TRACE(XC_LOOP_DOIT, &tset, func, arg1, arg2);
    998 			if (func != NULL)
    999 				(*func)(arg1, arg2);
   1000 			xmp->xc_state = XC_WAIT;
   1001 			membar_stld();
   1002 			/*
   1003 			 * reset the timeout counter
   1004 			 * since some work was done
   1005 			 */
   1006 			loop_cnt = 0;
   1007 		} else {
   1008 			/* patience is a virtue... */
   1009 			loop_cnt++;
   1010 		}
   1011 
   1012 		if (loop_cnt > xc_func_time_limit) {
   1013 			if (sendmondo_in_recover) {
   1014 				drv_usecwait(1);
   1015 				loop_cnt = 0;
   1016 				continue;
   1017 			}
   1018 			cmn_err(xc_loop_panic ? CE_PANIC : CE_WARN,
   1019 			    "xc_loop() timeout");
   1020 			/*
   1021 			 * if the above displayed a warning,
   1022 			 * reset the timeout counter and be patient
   1023 			 */
   1024 			loop_cnt = 0;
   1025 		}
   1026 	}
   1027 	ASSERT(xmp->xc_state == XC_EXIT);
   1028 	ASSERT(xc_holder != -1);
   1029 	XC_TRACE(XC_LOOP_EXIT, &tset, NULL, NULL, NULL);
   1030 	xmp->xc_state = XC_IDLE;
   1031 	membar_stld();
   1032 	return (1);
   1033 }
   1034