Home | History | Annotate | Download | only in pcplusmp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
     29  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
     30  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
     31  * PSMI 1.5 extensions are supported in Solaris Nevada.
     32  * PSMI 1.6 extensions are supported in Solaris Nevada.
     33  */
     34 #define	PSMI_1_6
     35 
     36 #include <sys/processor.h>
     37 #include <sys/time.h>
     38 #include <sys/psm.h>
     39 #include <sys/smp_impldefs.h>
     40 #include <sys/cram.h>
     41 #include <sys/acpi/acpi.h>
     42 #include <sys/acpica.h>
     43 #include <sys/psm_common.h>
     44 #include <sys/apic.h>
     45 #include <sys/pit.h>
     46 #include <sys/ddi.h>
     47 #include <sys/sunddi.h>
     48 #include <sys/ddi_impldefs.h>
     49 #include <sys/pci.h>
     50 #include <sys/promif.h>
     51 #include <sys/x86_archext.h>
     52 #include <sys/cpc_impl.h>
     53 #include <sys/uadmin.h>
     54 #include <sys/panic.h>
     55 #include <sys/debug.h>
     56 #include <sys/archsystm.h>
     57 #include <sys/trap.h>
     58 #include <sys/machsystm.h>
     59 #include <sys/sysmacros.h>
     60 #include <sys/cpuvar.h>
     61 #include <sys/rm_platter.h>
     62 #include <sys/privregs.h>
     63 #include <sys/note.h>
     64 #include <sys/pci_intr_lib.h>
     65 #include <sys/spl.h>
     66 #include <sys/clock.h>
     67 #include <sys/dditypes.h>
     68 #include <sys/sunddi.h>
     69 #include <sys/x_call.h>
     70 #include <sys/reboot.h>
     71 #include <sys/hpet.h>
     72 
     73 /*
     74  *	Local Function Prototypes
     75  */
     76 static void apic_init_intr();
     77 static void apic_nmi_intr(caddr_t arg, struct regs *rp);
     78 
     79 /*
     80  *	standard MP entries
     81  */
     82 static int	apic_probe();
     83 static int	apic_clkinit();
     84 static int	apic_getclkirq(int ipl);
     85 static uint_t	apic_calibrate(volatile uint32_t *addr,
     86     uint16_t *pit_ticks_adj);
     87 static hrtime_t apic_gettime();
     88 static hrtime_t apic_gethrtime();
     89 static void	apic_init();
     90 static void	apic_picinit(void);
     91 static int	apic_cpu_start(processorid_t, caddr_t);
     92 static int	apic_post_cpu_start(void);
     93 static void	apic_send_ipi(int cpun, int ipl);
     94 static void	apic_set_idlecpu(processorid_t cpun);
     95 static void	apic_unset_idlecpu(processorid_t cpun);
     96 static int	apic_intr_enter(int ipl, int *vect);
     97 static void	apic_setspl(int ipl);
     98 static void	x2apic_setspl(int ipl);
     99 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
    100 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
    101 static void	apic_shutdown(int cmd, int fcn);
    102 static void	apic_preshutdown(int cmd, int fcn);
    103 static int	apic_disable_intr(processorid_t cpun);
    104 static void	apic_enable_intr(processorid_t cpun);
    105 static processorid_t	apic_get_next_processorid(processorid_t cpun);
    106 static int		apic_get_ipivect(int ipl, int type);
    107 static void	apic_timer_reprogram(hrtime_t time);
    108 static void	apic_timer_enable(void);
    109 static void	apic_timer_disable(void);
    110 static void	apic_post_cyclic_setup(void *arg);
    111 static void	apic_intrr_init(int apic_mode);
    112 static void	apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt);
    113 static void	apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs);
    114 
    115 static int	apic_oneshot = 0;
    116 int	apic_oneshot_enable = 1; /* to allow disabling one-shot capability */
    117 
    118 /* Now the ones for Dynamic Interrupt distribution */
    119 int	apic_enable_dynamic_migration = 0;
    120 
    121 extern int apic_have_32bit_cr8;
    122 
    123 /*
    124  * These variables are frequently accessed in apic_intr_enter(),
    125  * apic_intr_exit and apic_setspl, so group them together
    126  */
    127 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
    128 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
    129 int apic_clkvect;
    130 
    131 /* vector at which error interrupts come in */
    132 int apic_errvect;
    133 int apic_enable_error_intr = 1;
    134 int apic_error_display_delay = 100;
    135 
    136 /* vector at which performance counter overflow interrupts come in */
    137 int apic_cpcovf_vect;
    138 int apic_enable_cpcovf_intr = 1;
    139 
    140 /* vector at which CMCI interrupts come in */
    141 int apic_cmci_vect;
    142 extern int cmi_enable_cmci;
    143 extern void cmi_cmci_trap(void);
    144 
    145 static kmutex_t cmci_cpu_setup_lock;	/* protects cmci_cpu_setup_registered */
    146 static int cmci_cpu_setup_registered;
    147 
    148 /*
    149  * The following vector assignments influence the value of ipltopri and
    150  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
    151  * idle to 0 and IPL 0 to 0xf to differentiate idle in case
    152  * we care to do so in future. Note some IPLs which are rarely used
    153  * will share the vector ranges and heavily used IPLs (5 and 6) have
    154  * a wide range.
    155  *
    156  * This array is used to initialize apic_ipls[] (in apic_init()).
    157  *
    158  *	IPL		Vector range.		as passed to intr_enter
    159  *	0		none.
    160  *	1,2,3		0x20-0x2f		0x0-0xf
    161  *	4		0x30-0x3f		0x10-0x1f
    162  *	5		0x40-0x5f		0x20-0x3f
    163  *	6		0x60-0x7f		0x40-0x5f
    164  *	7,8,9		0x80-0x8f		0x60-0x6f
    165  *	10		0x90-0x9f		0x70-0x7f
    166  *	11		0xa0-0xaf		0x80-0x8f
    167  *	...		...
    168  *	15		0xe0-0xef		0xc0-0xcf
    169  *	15		0xf0-0xff		0xd0-0xdf
    170  */
    171 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
    172 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15
    173 };
    174 	/*
    175 	 * The ipl of an ISR at vector X is apic_vectortoipl[X>>4]
    176 	 * NOTE that this is vector as passed into intr_enter which is
    177 	 * programmed vector - 0x20 (APIC_BASE_VECT)
    178 	 */
    179 
    180 uchar_t	apic_ipltopri[MAXIPL + 1];	/* unix ipl to apic pri	*/
    181 	/* The taskpri to be programmed into apic to mask given ipl */
    182 
    183 #if defined(__amd64)
    184 uchar_t	apic_cr8pri[MAXIPL + 1];	/* unix ipl to cr8 pri	*/
    185 #endif
    186 
    187 /*
    188  * Correlation of the hardware vector to the IPL in use, initialized
    189  * from apic_vectortoipl[] in apic_init().  The final IPLs may not correlate
    190  * to the IPLs in apic_vectortoipl on some systems that share interrupt lines
    191  * connected to errata-stricken IOAPICs
    192  */
    193 uchar_t apic_ipls[APIC_AVAIL_VECTOR];
    194 
    195 /*
    196  * Patchable global variables.
    197  */
    198 int	apic_forceload = 0;
    199 
    200 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
    201 					/* 1 - use gettime() for performance */
    202 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
    203 int	apic_enable_hwsoftint = 0;	/* 0 - disable, 1 - enable	*/
    204 int	apic_enable_bind_log = 1;	/* 1 - display interrupt binding log */
    205 int	apic_panic_on_nmi = 0;
    206 int	apic_panic_on_apic_error = 0;
    207 
    208 int	apic_verbose = 0;
    209 
    210 /* minimum number of timer ticks to program to */
    211 int apic_min_timer_ticks = 1;
    212 /*
    213  *	Local static data
    214  */
    215 static struct	psm_ops apic_ops = {
    216 	apic_probe,
    217 
    218 	apic_init,
    219 	apic_picinit,
    220 	apic_intr_enter,
    221 	apic_intr_exit,
    222 	apic_setspl,
    223 	apic_addspl,
    224 	apic_delspl,
    225 	apic_disable_intr,
    226 	apic_enable_intr,
    227 	(int (*)(int))NULL,		/* psm_softlvl_to_irq */
    228 	(void (*)(int))NULL,		/* psm_set_softintr */
    229 
    230 	apic_set_idlecpu,
    231 	apic_unset_idlecpu,
    232 
    233 	apic_clkinit,
    234 	apic_getclkirq,
    235 	(void (*)(void))NULL,		/* psm_hrtimeinit */
    236 	apic_gethrtime,
    237 
    238 	apic_get_next_processorid,
    239 	apic_cpu_start,
    240 	apic_post_cpu_start,
    241 	apic_shutdown,
    242 	apic_get_ipivect,
    243 	apic_send_ipi,
    244 
    245 	(int (*)(dev_info_t *, int))NULL,	/* psm_translate_irq */
    246 	(void (*)(int, char *))NULL,	/* psm_notify_error */
    247 	(void (*)(int))NULL,		/* psm_notify_func */
    248 	apic_timer_reprogram,
    249 	apic_timer_enable,
    250 	apic_timer_disable,
    251 	apic_post_cyclic_setup,
    252 	apic_preshutdown,
    253 	apic_intr_ops,			/* Advanced DDI Interrupt framework */
    254 	apic_state,			/* save, restore apic state for S3 */
    255 };
    256 
    257 
    258 static struct	psm_info apic_psm_info = {
    259 	PSM_INFO_VER01_6,			/* version */
    260 	PSM_OWN_EXCLUSIVE,			/* ownership */
    261 	(struct psm_ops *)&apic_ops,		/* operation */
    262 	APIC_PCPLUSMP_NAME,			/* machine name */
    263 	"pcplusmp v1.4 compatible",
    264 };
    265 
    266 static void *apic_hdlp;
    267 
    268 #ifdef DEBUG
    269 int	apic_debug = 0;
    270 int	apic_restrict_vector = 0;
    271 
    272 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
    273 int	apic_debug_msgbufindex = 0;
    274 
    275 #endif /* DEBUG */
    276 
    277 apic_cpus_info_t	*apic_cpus;
    278 
    279 cpuset_t	apic_cpumask;
    280 uint_t	apic_picinit_called;
    281 
    282 /* Flag to indicate that we need to shut down all processors */
    283 static uint_t	apic_shutdown_processors;
    284 
    285 uint_t apic_nsec_per_intr = 0;
    286 
    287 /*
    288  * apic_let_idle_redistribute can have the following values:
    289  * 0 - If clock decremented it from 1 to 0, clock has to call redistribute.
    290  * apic_redistribute_lock prevents multiple idle cpus from redistributing
    291  */
    292 int	apic_num_idle_redistributions = 0;
    293 static	int apic_let_idle_redistribute = 0;
    294 static	uint_t apic_nticks = 0;
    295 static	uint_t apic_skipped_redistribute = 0;
    296 
    297 /* to gather intr data and redistribute */
    298 static void apic_redistribute_compute(void);
    299 
    300 static	uint_t last_count_read = 0;
    301 static	lock_t	apic_gethrtime_lock;
    302 volatile int	apic_hrtime_stamp = 0;
    303 volatile hrtime_t apic_nsec_since_boot = 0;
    304 static uint_t apic_hertz_count;
    305 
    306 uint64_t apic_ticks_per_SFnsecs;	/* # of ticks in SF nsecs */
    307 
    308 static hrtime_t apic_nsec_max;
    309 
    310 static	hrtime_t	apic_last_hrtime = 0;
    311 int		apic_hrtime_error = 0;
    312 int		apic_remote_hrterr = 0;
    313 int		apic_num_nmis = 0;
    314 int		apic_apic_error = 0;
    315 int		apic_num_apic_errors = 0;
    316 int		apic_num_cksum_errors = 0;
    317 
    318 int	apic_error = 0;
    319 static	int	apic_cmos_ssb_set = 0;
    320 
    321 /* use to make sure only one cpu handles the nmi */
    322 static	lock_t	apic_nmi_lock;
    323 /* use to make sure only one cpu handles the error interrupt */
    324 static	lock_t	apic_error_lock;
    325 
    326 static	struct {
    327 	uchar_t	cntl;
    328 	uchar_t	data;
    329 } aspen_bmc[] = {
    330 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
    331 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
    332 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
    333 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
    334 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
    335 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
    336 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
    337 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
    338 
    339 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
    340 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
    341 };
    342 
    343 static	struct {
    344 	int	port;
    345 	uchar_t	data;
    346 } sitka_bmc[] = {
    347 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
    348 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
    349 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
    350 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
    351 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
    352 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
    353 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
    354 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
    355 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
    356 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
    357 
    358 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
    359 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
    360 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
    361 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
    362 };
    363 
    364 /* Patchable global variables. */
    365 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
    366 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
    367 
    368 /* default apic ops without interrupt remapping */
    369 static apic_intrr_ops_t apic_nointrr_ops = {
    370 	(int (*)(int))return_instr,
    371 	(void (*)(int))return_instr,
    372 	(void (*)(apic_irq_t *))return_instr,
    373 	(void (*)(apic_irq_t *, void *))return_instr,
    374 	(void (*)(apic_irq_t *))return_instr,
    375 	apic_record_ioapic_rdt,
    376 	apic_record_msi,
    377 };
    378 
    379 apic_intrr_ops_t *apic_vt_ops = &apic_nointrr_ops;
    380 
    381 /*
    382  *	This is the loadable module wrapper
    383  */
    384 
    385 int
    386 _init(void)
    387 {
    388 	if (apic_coarse_hrtime)
    389 		apic_ops.psm_gethrtime = &apic_gettime;
    390 	return (psm_mod_init(&apic_hdlp, &apic_psm_info));
    391 }
    392 
    393 int
    394 _fini(void)
    395 {
    396 	return (psm_mod_fini(&apic_hdlp, &apic_psm_info));
    397 }
    398 
    399 int
    400 _info(struct modinfo *modinfop)
    401 {
    402 	return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop));
    403 }
    404 
    405 
    406 static int
    407 apic_probe()
    408 {
    409 	return (apic_probe_common(apic_psm_info.p_mach_idstring));
    410 }
    411 
    412 void
    413 apic_init()
    414 {
    415 	int i;
    416 	int	j = 1;
    417 
    418 	apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */
    419 	for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) {
    420 		if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) &&
    421 		    (apic_vectortoipl[i + 1] == apic_vectortoipl[i]))
    422 			/* get to highest vector at the same ipl */
    423 			continue;
    424 		for (; j <= apic_vectortoipl[i]; j++) {
    425 			apic_ipltopri[j] = (i << APIC_IPL_SHIFT) +
    426 			    APIC_BASE_VECT;
    427 		}
    428 	}
    429 	for (; j < MAXIPL + 1; j++)
    430 		/* fill up any empty ipltopri slots */
    431 		apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT;
    432 	apic_init_common();
    433 #if defined(__amd64)
    434 	/*
    435 	 * Make cpu-specific interrupt info point to cr8pri vector
    436 	 */
    437 	for (i = 0; i <= MAXIPL; i++)
    438 		apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT;
    439 	CPU->cpu_pri_data = apic_cr8pri;
    440 #else
    441 	if (cpuid_have_cr8access(CPU))
    442 		apic_have_32bit_cr8 = 1;
    443 #endif	/* __amd64 */
    444 }
    445 
    446 /*
    447  * handler for APIC Error interrupt. Just print a warning and continue
    448  */
    449 static int
    450 apic_error_intr()
    451 {
    452 	uint_t	error0, error1, error;
    453 	uint_t	i;
    454 
    455 	/*
    456 	 * We need to write before read as per 7.4.17 of system prog manual.
    457 	 * We do both and or the results to be safe
    458 	 */
    459 	error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
    460 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    461 	error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
    462 	error = error0 | error1;
    463 
    464 	/*
    465 	 * Clear the APIC error status (do this on all cpus that enter here)
    466 	 * (two writes are required due to the semantics of accessing the
    467 	 * error status register.)
    468 	 */
    469 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    470 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    471 
    472 	/*
    473 	 * Prevent more than 1 CPU from handling error interrupt causing
    474 	 * double printing (interleave of characters from multiple
    475 	 * CPU's when using prom_printf)
    476 	 */
    477 	if (lock_try(&apic_error_lock) == 0)
    478 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
    479 	if (error) {
    480 #if	DEBUG
    481 		if (apic_debug)
    482 			debug_enter("pcplusmp: APIC Error interrupt received");
    483 #endif /* DEBUG */
    484 		if (apic_panic_on_apic_error)
    485 			cmn_err(CE_PANIC,
    486 			    "APIC Error interrupt on CPU %d. Status = %x\n",
    487 			    psm_get_cpu_id(), error);
    488 		else {
    489 			if ((error & ~APIC_CS_ERRORS) == 0) {
    490 				/* cksum error only */
    491 				apic_error |= APIC_ERR_APIC_ERROR;
    492 				apic_apic_error |= error;
    493 				apic_num_apic_errors++;
    494 				apic_num_cksum_errors++;
    495 			} else {
    496 				/*
    497 				 * prom_printf is the best shot we have of
    498 				 * something which is problem free from
    499 				 * high level/NMI type of interrupts
    500 				 */
    501 				prom_printf("APIC Error interrupt on CPU %d. "
    502 				    "Status 0 = %x, Status 1 = %x\n",
    503 				    psm_get_cpu_id(), error0, error1);
    504 				apic_error |= APIC_ERR_APIC_ERROR;
    505 				apic_apic_error |= error;
    506 				apic_num_apic_errors++;
    507 				for (i = 0; i < apic_error_display_delay; i++) {
    508 					tenmicrosec();
    509 				}
    510 				/*
    511 				 * provide more delay next time limited to
    512 				 * roughly 1 clock tick time
    513 				 */
    514 				if (apic_error_display_delay < 500)
    515 					apic_error_display_delay *= 2;
    516 			}
    517 		}
    518 		lock_clear(&apic_error_lock);
    519 		return (DDI_INTR_CLAIMED);
    520 	} else {
    521 		lock_clear(&apic_error_lock);
    522 		return (DDI_INTR_UNCLAIMED);
    523 	}
    524 	/* NOTREACHED */
    525 }
    526 
    527 /*
    528  * Turn off the mask bit in the performance counter Local Vector Table entry.
    529  */
    530 static void
    531 apic_cpcovf_mask_clear(void)
    532 {
    533 	apic_reg_ops->apic_write(APIC_PCINT_VECT,
    534 	    (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
    535 }
    536 
    537 /*ARGSUSED*/
    538 static int
    539 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
    540 {
    541 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
    542 	return (0);
    543 }
    544 
    545 /*ARGSUSED*/
    546 static int
    547 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
    548 {
    549 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
    550 	return (0);
    551 }
    552 
    553 /*ARGSUSED*/
    554 static int
    555 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
    556 {
    557 	cpuset_t	cpu_set;
    558 
    559 	CPUSET_ONLY(cpu_set, cpuid);
    560 
    561 	switch (what) {
    562 		case CPU_ON:
    563 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
    564 			    (xc_func_t)apic_cmci_enable);
    565 			break;
    566 
    567 		case CPU_OFF:
    568 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
    569 			    (xc_func_t)apic_cmci_disable);
    570 			break;
    571 
    572 		default:
    573 			break;
    574 	}
    575 
    576 	return (0);
    577 }
    578 
    579 static void
    580 apic_init_intr()
    581 {
    582 	processorid_t	cpun = psm_get_cpu_id();
    583 	uint_t nlvt;
    584 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
    585 
    586 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
    587 
    588 	if (apic_mode == LOCAL_APIC) {
    589 		/*
    590 		 * We are running APIC in MMIO mode.
    591 		 */
    592 		if (apic_flat_model) {
    593 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
    594 			    APIC_FLAT_MODEL);
    595 		} else {
    596 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
    597 			    APIC_CLUSTER_MODEL);
    598 		}
    599 
    600 		apic_reg_ops->apic_write(APIC_DEST_REG,
    601 		    AV_HIGH_ORDER >> cpun);
    602 	}
    603 
    604 	if (apic_directed_EOI_supported()) {
    605 		/*
    606 		 * Setting the 12th bit in the Spurious Interrupt Vector
    607 		 * Register suppresses broadcast EOIs generated by the local
    608 		 * APIC. The suppression of broadcast EOIs happens only when
    609 		 * interrupts are level-triggered.
    610 		 */
    611 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
    612 	}
    613 
    614 	/* need to enable APIC before unmasking NMI */
    615 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
    616 
    617 	/*
    618 	 * Presence of an invalid vector with delivery mode AV_FIXED can
    619 	 * cause an error interrupt, even if the entry is masked...so
    620 	 * write a valid vector to LVT entries along with the mask bit
    621 	 */
    622 
    623 	/* All APICs have timer and LINT0/1 */
    624 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
    625 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
    626 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
    627 
    628 	/*
    629 	 * On integrated APICs, the number of LVT entries is
    630 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
    631 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
    632 	 */
    633 
    634 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
    635 		nlvt = 3;
    636 	} else {
    637 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
    638 		    0xFF) + 1;
    639 	}
    640 
    641 	if (nlvt >= 5) {
    642 		/* Enable performance counter overflow interrupt */
    643 
    644 		if ((x86_feature & X86_MSR) != X86_MSR)
    645 			apic_enable_cpcovf_intr = 0;
    646 		if (apic_enable_cpcovf_intr) {
    647 			if (apic_cpcovf_vect == 0) {
    648 				int ipl = APIC_PCINT_IPL;
    649 				int irq = apic_get_ipivect(ipl, -1);
    650 
    651 				ASSERT(irq != -1);
    652 				apic_cpcovf_vect =
    653 				    apic_irq_table[irq]->airq_vector;
    654 				ASSERT(apic_cpcovf_vect);
    655 				(void) add_avintr(NULL, ipl,
    656 				    (avfunc)kcpc_hw_overflow_intr,
    657 				    "apic pcint", irq, NULL, NULL, NULL, NULL);
    658 				kcpc_hw_overflow_intr_installed = 1;
    659 				kcpc_hw_enable_cpc_intr =
    660 				    apic_cpcovf_mask_clear;
    661 			}
    662 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
    663 			    apic_cpcovf_vect);
    664 		}
    665 	}
    666 
    667 	if (nlvt >= 6) {
    668 		/* Only mask TM intr if the BIOS apparently doesn't use it */
    669 
    670 		uint32_t lvtval;
    671 
    672 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
    673 		if (((lvtval & AV_MASK) == AV_MASK) ||
    674 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
    675 			apic_reg_ops->apic_write(APIC_THERM_VECT,
    676 			    AV_MASK|APIC_RESV_IRQ);
    677 		}
    678 	}
    679 
    680 	/* Enable error interrupt */
    681 
    682 	if (nlvt >= 4 && apic_enable_error_intr) {
    683 		if (apic_errvect == 0) {
    684 			int ipl = 0xf;	/* get highest priority intr */
    685 			int irq = apic_get_ipivect(ipl, -1);
    686 
    687 			ASSERT(irq != -1);
    688 			apic_errvect = apic_irq_table[irq]->airq_vector;
    689 			ASSERT(apic_errvect);
    690 			/*
    691 			 * Not PSMI compliant, but we are going to merge
    692 			 * with ON anyway
    693 			 */
    694 			(void) add_avintr((void *)NULL, ipl,
    695 			    (avfunc)apic_error_intr, "apic error intr",
    696 			    irq, NULL, NULL, NULL, NULL);
    697 		}
    698 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
    699 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    700 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    701 	}
    702 
    703 	/* Enable CMCI interrupt */
    704 	if (cmi_enable_cmci) {
    705 
    706 		mutex_enter(&cmci_cpu_setup_lock);
    707 		if (cmci_cpu_setup_registered == 0) {
    708 			mutex_enter(&cpu_lock);
    709 			register_cpu_setup_func(cmci_cpu_setup, NULL);
    710 			mutex_exit(&cpu_lock);
    711 			cmci_cpu_setup_registered = 1;
    712 		}
    713 		mutex_exit(&cmci_cpu_setup_lock);
    714 
    715 		if (apic_cmci_vect == 0) {
    716 			int ipl = 0x2;
    717 			int irq = apic_get_ipivect(ipl, -1);
    718 
    719 			ASSERT(irq != -1);
    720 			apic_cmci_vect = apic_irq_table[irq]->airq_vector;
    721 			ASSERT(apic_cmci_vect);
    722 
    723 			(void) add_avintr(NULL, ipl,
    724 			    (avfunc)cmi_cmci_trap,
    725 			    "apic cmci intr", irq, NULL, NULL, NULL, NULL);
    726 		}
    727 		apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
    728 	}
    729 }
    730 
    731 static void
    732 apic_disable_local_apic()
    733 {
    734 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
    735 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
    736 
    737 	/* local intr reg 0 */
    738 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
    739 
    740 	/* disable NMI */
    741 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
    742 
    743 	/* and error interrupt */
    744 	apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
    745 
    746 	/* and perf counter intr */
    747 	apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
    748 
    749 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
    750 }
    751 
    752 static void
    753 apic_picinit(void)
    754 {
    755 	int i, j;
    756 	uint_t isr;
    757 
    758 	/*
    759 	 * initialize interrupt remapping before apic
    760 	 * hardware initialization
    761 	 */
    762 	apic_intrr_init(apic_mode);
    763 
    764 	/*
    765 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
    766 	 * bit on without clearing it with EOI.  Since softint
    767 	 * uses vector 0x20 to interrupt itself, so softint will
    768 	 * not work on this machine.  In order to fix this problem
    769 	 * a check is made to verify all the isr bits are clear.
    770 	 * If not, EOIs are issued to clear the bits.
    771 	 */
    772 	for (i = 7; i >= 1; i--) {
    773 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
    774 		if (isr != 0)
    775 			for (j = 0; ((j < 32) && (isr != 0)); j++)
    776 				if (isr & (1 << j)) {
    777 					apic_reg_ops->apic_write(
    778 					    APIC_EOI_REG, 0);
    779 					isr &= ~(1 << j);
    780 					apic_error |= APIC_ERR_BOOT_EOI;
    781 				}
    782 	}
    783 
    784 	/* set a flag so we know we have run apic_picinit() */
    785 	apic_picinit_called = 1;
    786 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
    787 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
    788 	LOCK_INIT_CLEAR(&apic_error_lock);
    789 
    790 	picsetup();	 /* initialise the 8259 */
    791 
    792 	/* add nmi handler - least priority nmi handler */
    793 	LOCK_INIT_CLEAR(&apic_nmi_lock);
    794 
    795 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
    796 	    "pcplusmp NMI handler", (caddr_t)NULL))
    797 		cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler");
    798 
    799 	apic_init_intr();
    800 
    801 	/* enable apic mode if imcr present */
    802 	if (apic_imcrp) {
    803 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
    804 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
    805 	}
    806 
    807 	ioapic_init_intr(IOAPIC_MASK);
    808 }
    809 
    810 
    811 /*ARGSUSED1*/
    812 static int
    813 apic_cpu_start(processorid_t cpun, caddr_t arg)
    814 {
    815 	int		loop_count;
    816 	uint32_t	vector;
    817 	uint_t		cpu_id;
    818 	ulong_t		iflag;
    819 
    820 	cpu_id =  apic_cpus[cpun].aci_local_id;
    821 
    822 	apic_cmos_ssb_set = 1;
    823 
    824 	/*
    825 	 * Interrupts on BSP cpu will be disabled during these startup
    826 	 * steps in order to avoid unwanted side effects from
    827 	 * executing interrupt handlers on a problematic BIOS.
    828 	 */
    829 
    830 	iflag = intr_clear();
    831 	outb(CMOS_ADDR, SSB);
    832 	outb(CMOS_DATA, BIOS_SHUTDOWN);
    833 
    834 	/*
    835 	 * According to X2APIC specification in section '2.3.5.1' of
    836 	 * Interrupt Command Register Semantics, the semantics of
    837 	 * programming the Interrupt Command Register to dispatch an interrupt
    838 	 * is simplified. A single MSR write to the 64-bit ICR is required
    839 	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
    840 	 * interface to ICR, system software is not required to check the
    841 	 * status of the delivery status bit prior to writing to the ICR
    842 	 * to send an IPI. With the removal of the Delivery Status bit,
    843 	 * system software no longer has a reason to read the ICR. It remains
    844 	 * readable only to aid in debugging.
    845 	 */
    846 #ifdef	DEBUG
    847 	APIC_AV_PENDING_SET();
    848 #else
    849 	if (apic_mode == LOCAL_APIC) {
    850 		APIC_AV_PENDING_SET();
    851 	}
    852 #endif /* DEBUG */
    853 
    854 	/* for integrated - make sure there is one INIT IPI in buffer */
    855 	/* for external - it will wake up the cpu */
    856 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_ASSERT | AV_RESET);
    857 
    858 	/* If only 1 CPU is installed, PENDING bit will not go low */
    859 	for (loop_count = 0x1000; loop_count; loop_count--) {
    860 		if (apic_mode == LOCAL_APIC &&
    861 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
    862 			apic_ret();
    863 		else
    864 			break;
    865 	}
    866 
    867 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_DEASSERT | AV_RESET);
    868 
    869 	drv_usecwait(20000);		/* 20 milli sec */
    870 
    871 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
    872 		/* integrated apic */
    873 
    874 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
    875 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
    876 
    877 		/* to offset the INIT IPI queue up in the buffer */
    878 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
    879 
    880 		drv_usecwait(200);		/* 20 micro sec */
    881 
    882 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
    883 
    884 		drv_usecwait(200);		/* 20 micro sec */
    885 	}
    886 	intr_restore(iflag);
    887 	return (0);
    888 }
    889 
    890 
    891 #ifdef	DEBUG
    892 int	apic_break_on_cpu = 9;
    893 int	apic_stretch_interrupts = 0;
    894 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
    895 
    896 void
    897 apic_break()
    898 {
    899 }
    900 #endif /* DEBUG */
    901 
    902 /*
    903  * platform_intr_enter
    904  *
    905  *	Called at the beginning of the interrupt service routine to
    906  *	mask all level equal to and below the interrupt priority
    907  *	of the interrupting vector.  An EOI should be given to
    908  *	the interrupt controller to enable other HW interrupts.
    909  *
    910  *	Return -1 for spurious interrupts
    911  *
    912  */
    913 /*ARGSUSED*/
    914 static int
    915 apic_intr_enter(int ipl, int *vectorp)
    916 {
    917 	uchar_t vector;
    918 	int nipl;
    919 	int irq;
    920 	ulong_t iflag;
    921 	apic_cpus_info_t *cpu_infop;
    922 
    923 	/*
    924 	 * The real vector delivered is (*vectorp + 0x20), but our caller
    925 	 * subtracts 0x20 from the vector before passing it to us.
    926 	 * (That's why APIC_BASE_VECT is 0x20.)
    927 	 */
    928 	vector = (uchar_t)*vectorp;
    929 
    930 	/* if interrupted by the clock, increment apic_nsec_since_boot */
    931 	if (vector == apic_clkvect) {
    932 		if (!apic_oneshot) {
    933 			/* NOTE: this is not MT aware */
    934 			apic_hrtime_stamp++;
    935 			apic_nsec_since_boot += apic_nsec_per_intr;
    936 			apic_hrtime_stamp++;
    937 			last_count_read = apic_hertz_count;
    938 			apic_redistribute_compute();
    939 		}
    940 
    941 		/* We will avoid all the book keeping overhead for clock */
    942 		nipl = apic_ipls[vector];
    943 
    944 		*vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT];
    945 		if (apic_mode == LOCAL_APIC) {
    946 #if defined(__amd64)
    947 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
    948 			    APIC_IPL_SHIFT));
    949 #else
    950 			if (apic_have_32bit_cr8)
    951 				setcr8((ulong_t)(apic_ipltopri[nipl] >>
    952 				    APIC_IPL_SHIFT));
    953 			else
    954 				LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
    955 				    (uint32_t)apic_ipltopri[nipl]);
    956 #endif
    957 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
    958 		} else {
    959 			X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
    960 			X2APIC_WRITE(APIC_EOI_REG, 0);
    961 		}
    962 
    963 		return (nipl);
    964 	}
    965 
    966 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
    967 
    968 	if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) {
    969 		cpu_infop->aci_spur_cnt++;
    970 		return (APIC_INT_SPURIOUS);
    971 	}
    972 
    973 	/* Check if the vector we got is really what we need */
    974 	if (apic_revector_pending) {
    975 		/*
    976 		 * Disable interrupts for the duration of
    977 		 * the vector translation to prevent a self-race for
    978 		 * the apic_revector_lock.  This cannot be done
    979 		 * in apic_xlate_vector because it is recursive and
    980 		 * we want the vector translation to be atomic with
    981 		 * respect to other (higher-priority) interrupts.
    982 		 */
    983 		iflag = intr_clear();
    984 		vector = apic_xlate_vector(vector + APIC_BASE_VECT) -
    985 		    APIC_BASE_VECT;
    986 		intr_restore(iflag);
    987 	}
    988 
    989 	nipl = apic_ipls[vector];
    990 	*vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT];
    991 
    992 	if (apic_mode == LOCAL_APIC) {
    993 #if defined(__amd64)
    994 		setcr8((ulong_t)(apic_ipltopri[nipl] >> APIC_IPL_SHIFT));
    995 #else
    996 		if (apic_have_32bit_cr8)
    997 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
    998 			    APIC_IPL_SHIFT));
    999 		else
   1000 			LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
   1001 			    (uint32_t)apic_ipltopri[nipl]);
   1002 #endif
   1003 	} else {
   1004 		X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
   1005 	}
   1006 
   1007 	cpu_infop->aci_current[nipl] = (uchar_t)irq;
   1008 	cpu_infop->aci_curipl = (uchar_t)nipl;
   1009 	cpu_infop->aci_ISR_in_progress |= 1 << nipl;
   1010 
   1011 	/*
   1012 	 * apic_level_intr could have been assimilated into the irq struct.
   1013 	 * but, having it as a character array is more efficient in terms of
   1014 	 * cache usage. So, we leave it as is.
   1015 	 */
   1016 	if (!apic_level_intr[irq]) {
   1017 		if (apic_mode == LOCAL_APIC) {
   1018 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
   1019 		} else {
   1020 			X2APIC_WRITE(APIC_EOI_REG, 0);
   1021 		}
   1022 	}
   1023 
   1024 #ifdef	DEBUG
   1025 	APIC_DEBUG_BUF_PUT(vector);
   1026 	APIC_DEBUG_BUF_PUT(irq);
   1027 	APIC_DEBUG_BUF_PUT(nipl);
   1028 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
   1029 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
   1030 		drv_usecwait(apic_stretch_interrupts);
   1031 
   1032 	if (apic_break_on_cpu == psm_get_cpu_id())
   1033 		apic_break();
   1034 #endif /* DEBUG */
   1035 	return (nipl);
   1036 }
   1037 
   1038 /*
   1039  * This macro is a common code used by MMIO local apic and X2APIC
   1040  * local apic.
   1041  */
   1042 #define	APIC_INTR_EXIT() \
   1043 { \
   1044 	cpu_infop = &apic_cpus[psm_get_cpu_id()]; \
   1045 	if (apic_level_intr[irq]) \
   1046 		apic_reg_ops->apic_send_eoi(irq); \
   1047 	cpu_infop->aci_curipl = (uchar_t)prev_ipl; \
   1048 	/* ISR above current pri could not be in progress */ \
   1049 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; \
   1050 }
   1051 
   1052 /*
   1053  * Any changes made to this function must also change X2APIC
   1054  * version of intr_exit.
   1055  */
   1056 void
   1057 apic_intr_exit(int prev_ipl, int irq)
   1058 {
   1059 	apic_cpus_info_t *cpu_infop;
   1060 
   1061 #if defined(__amd64)
   1062 	setcr8((ulong_t)apic_cr8pri[prev_ipl]);
   1063 #else
   1064 	if (apic_have_32bit_cr8)
   1065 		setcr8((ulong_t)(apic_ipltopri[prev_ipl] >> APIC_IPL_SHIFT));
   1066 	else
   1067 		apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl];
   1068 #endif
   1069 
   1070 	APIC_INTR_EXIT();
   1071 }
   1072 
   1073 /*
   1074  * Same as apic_intr_exit() except it uses MSR rather than MMIO
   1075  * to access local apic registers.
   1076  */
   1077 void
   1078 x2apic_intr_exit(int prev_ipl, int irq)
   1079 {
   1080 	apic_cpus_info_t *cpu_infop;
   1081 
   1082 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[prev_ipl]);
   1083 	APIC_INTR_EXIT();
   1084 }
   1085 
   1086 intr_exit_fn_t
   1087 psm_intr_exit_fn(void)
   1088 {
   1089 	if (apic_mode == LOCAL_X2APIC)
   1090 		return (x2apic_intr_exit);
   1091 
   1092 	return (apic_intr_exit);
   1093 }
   1094 
   1095 /*
   1096  * Mask all interrupts below or equal to the given IPL.
   1097  * Any changes made to this function must also change X2APIC
   1098  * version of setspl.
   1099  */
   1100 static void
   1101 apic_setspl(int ipl)
   1102 {
   1103 #if defined(__amd64)
   1104 	setcr8((ulong_t)apic_cr8pri[ipl]);
   1105 #else
   1106 	if (apic_have_32bit_cr8)
   1107 		setcr8((ulong_t)(apic_ipltopri[ipl] >> APIC_IPL_SHIFT));
   1108 	else
   1109 		apicadr[APIC_TASK_REG] = apic_ipltopri[ipl];
   1110 #endif
   1111 
   1112 	/* interrupts at ipl above this cannot be in progress */
   1113 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
   1114 	/*
   1115 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
   1116 	 * have enough time to come in before the priority is raised again
   1117 	 * during the idle() loop.
   1118 	 */
   1119 	if (apic_setspl_delay)
   1120 		(void) apic_reg_ops->apic_get_pri();
   1121 }
   1122 
   1123 /*
   1124  * X2APIC version of setspl.
   1125  * Mask all interrupts below or equal to the given IPL
   1126  */
   1127 static void
   1128 x2apic_setspl(int ipl)
   1129 {
   1130 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[ipl]);
   1131 
   1132 	/* interrupts at ipl above this cannot be in progress */
   1133 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
   1134 }
   1135 
   1136 /*
   1137  * generates an interprocessor interrupt to another CPU. Any changes made to
   1138  * this routine must be accompanied by similar changes to
   1139  * apic_common_send_ipi().
   1140  */
   1141 static void
   1142 apic_send_ipi(int cpun, int ipl)
   1143 {
   1144 	int vector;
   1145 	ulong_t flag;
   1146 
   1147 	vector = apic_resv_vector[ipl];
   1148 
   1149 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
   1150 
   1151 	flag = intr_clear();
   1152 
   1153 	APIC_AV_PENDING_SET();
   1154 
   1155 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
   1156 	    vector);
   1157 
   1158 	intr_restore(flag);
   1159 }
   1160 
   1161 
   1162 /*ARGSUSED*/
   1163 static void
   1164 apic_set_idlecpu(processorid_t cpun)
   1165 {
   1166 }
   1167 
   1168 /*ARGSUSED*/
   1169 static void
   1170 apic_unset_idlecpu(processorid_t cpun)
   1171 {
   1172 }
   1173 
   1174 
   1175 void
   1176 apic_ret()
   1177 {
   1178 }
   1179 
   1180 /*
   1181  * If apic_coarse_time == 1, then apic_gettime() is used instead of
   1182  * apic_gethrtime().  This is used for performance instead of accuracy.
   1183  */
   1184 
   1185 static hrtime_t
   1186 apic_gettime()
   1187 {
   1188 	int old_hrtime_stamp;
   1189 	hrtime_t temp;
   1190 
   1191 	/*
   1192 	 * In one-shot mode, we do not keep time, so if anyone
   1193 	 * calls psm_gettime() directly, we vector over to
   1194 	 * gethrtime().
   1195 	 * one-shot mode MUST NOT be enabled if this psm is the source of
   1196 	 * hrtime.
   1197 	 */
   1198 
   1199 	if (apic_oneshot)
   1200 		return (gethrtime());
   1201 
   1202 
   1203 gettime_again:
   1204 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
   1205 		apic_ret();
   1206 
   1207 	temp = apic_nsec_since_boot;
   1208 
   1209 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
   1210 		goto gettime_again;
   1211 	}
   1212 	return (temp);
   1213 }
   1214 
   1215 /*
   1216  * Here we return the number of nanoseconds since booting.  Note every
   1217  * clock interrupt increments apic_nsec_since_boot by the appropriate
   1218  * amount.
   1219  */
   1220 static hrtime_t
   1221 apic_gethrtime()
   1222 {
   1223 	int curr_timeval, countval, elapsed_ticks;
   1224 	int old_hrtime_stamp, status;
   1225 	hrtime_t temp;
   1226 	uint32_t cpun;
   1227 	ulong_t oflags;
   1228 
   1229 	/*
   1230 	 * In one-shot mode, we do not keep time, so if anyone
   1231 	 * calls psm_gethrtime() directly, we vector over to
   1232 	 * gethrtime().
   1233 	 * one-shot mode MUST NOT be enabled if this psm is the source of
   1234 	 * hrtime.
   1235 	 */
   1236 
   1237 	if (apic_oneshot)
   1238 		return (gethrtime());
   1239 
   1240 	oflags = intr_clear();	/* prevent migration */
   1241 
   1242 	cpun = apic_reg_ops->apic_read(APIC_LID_REG);
   1243 	if (apic_mode == LOCAL_APIC)
   1244 		cpun >>= APIC_ID_BIT_OFFSET;
   1245 
   1246 	lock_set(&apic_gethrtime_lock);
   1247 
   1248 gethrtime_again:
   1249 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
   1250 		apic_ret();
   1251 
   1252 	/*
   1253 	 * Check to see which CPU we are on.  Note the time is kept on
   1254 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
   1255 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
   1256 	 */
   1257 	if (cpun == apic_cpus[0].aci_local_id) {
   1258 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
   1259 	} else {
   1260 #ifdef	DEBUG
   1261 		APIC_AV_PENDING_SET();
   1262 #else
   1263 		if (apic_mode == LOCAL_APIC)
   1264 			APIC_AV_PENDING_SET();
   1265 #endif /* DEBUG */
   1266 
   1267 		apic_reg_ops->apic_write_int_cmd(
   1268 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
   1269 
   1270 		while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
   1271 		    & AV_READ_PENDING) {
   1272 			apic_ret();
   1273 		}
   1274 
   1275 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
   1276 			countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
   1277 		else {	/* 0 = invalid */
   1278 			apic_remote_hrterr++;
   1279 			/*
   1280 			 * return last hrtime right now, will need more
   1281 			 * testing if change to retry
   1282 			 */
   1283 			temp = apic_last_hrtime;
   1284 
   1285 			lock_clear(&apic_gethrtime_lock);
   1286 
   1287 			intr_restore(oflags);
   1288 
   1289 			return (temp);
   1290 		}
   1291 	}
   1292 	if (countval > last_count_read)
   1293 		countval = 0;
   1294 	else
   1295 		last_count_read = countval;
   1296 
   1297 	elapsed_ticks = apic_hertz_count - countval;
   1298 
   1299 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
   1300 	temp = apic_nsec_since_boot + curr_timeval;
   1301 
   1302 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
   1303 		/* we might have clobbered last_count_read. Restore it */
   1304 		last_count_read = apic_hertz_count;
   1305 		goto gethrtime_again;
   1306 	}
   1307 
   1308 	if (temp < apic_last_hrtime) {
   1309 		/* return last hrtime if error occurs */
   1310 		apic_hrtime_error++;
   1311 		temp = apic_last_hrtime;
   1312 	}
   1313 	else
   1314 		apic_last_hrtime = temp;
   1315 
   1316 	lock_clear(&apic_gethrtime_lock);
   1317 	intr_restore(oflags);
   1318 
   1319 	return (temp);
   1320 }
   1321 
   1322 /* apic NMI handler */
   1323 /*ARGSUSED*/
   1324 static void
   1325 apic_nmi_intr(caddr_t arg, struct regs *rp)
   1326 {
   1327 	if (apic_shutdown_processors) {
   1328 		apic_disable_local_apic();
   1329 		return;
   1330 	}
   1331 
   1332 	apic_error |= APIC_ERR_NMI;
   1333 
   1334 	if (!lock_try(&apic_nmi_lock))
   1335 		return;
   1336 	apic_num_nmis++;
   1337 
   1338 	if (apic_kmdb_on_nmi && psm_debugger()) {
   1339 		debug_enter("NMI received: entering kmdb\n");
   1340 	} else if (apic_panic_on_nmi) {
   1341 		/* Keep panic from entering kmdb. */
   1342 		nopanicdebug = 1;
   1343 		panic("NMI received\n");
   1344 	} else {
   1345 		/*
   1346 		 * prom_printf is the best shot we have of something which is
   1347 		 * problem free from high level/NMI type of interrupts
   1348 		 */
   1349 		prom_printf("NMI received\n");
   1350 	}
   1351 
   1352 	lock_clear(&apic_nmi_lock);
   1353 }
   1354 
   1355 /*ARGSUSED*/
   1356 static int
   1357 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
   1358 {
   1359 	return (apic_addspl_common(irqno, ipl, min_ipl, max_ipl));
   1360 }
   1361 
   1362 static int
   1363 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
   1364 {
   1365 	return (apic_delspl_common(irqno, ipl, min_ipl,  max_ipl));
   1366 }
   1367 
   1368 static int
   1369 apic_post_cpu_start()
   1370 {
   1371 	int cpun;
   1372 	static int cpus_started = 1;
   1373 	struct psm_ops *pops = &apic_ops;
   1374 
   1375 	/* We know this CPU + BSP  started successfully. */
   1376 	cpus_started++;
   1377 
   1378 	/*
   1379 	 * On BSP we would have enabled X2APIC, if supported by processor,
   1380 	 * in acpi_probe(), but on AP we do it here.
   1381 	 *
   1382 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
   1383 	 * local APIC mode of the current CPU is MMIO (xAPIC).
   1384 	 */
   1385 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
   1386 	    apic_local_mode() == LOCAL_APIC) {
   1387 		apic_enable_x2apic();
   1388 	}
   1389 
   1390 	/*
   1391 	 * We change psm_send_ipi and send_dirintf only if Solaris
   1392 	 * is booted in kmdb & the current CPU is the last CPU being
   1393 	 * brought up. We don't need to do anything if Solaris is running
   1394 	 * in MMIO mode (xAPIC).
   1395 	 */
   1396 	if ((boothowto & RB_DEBUG) &&
   1397 	    (cpus_started == boot_ncpus || cpus_started == apic_nproc) &&
   1398 	    apic_mode == LOCAL_X2APIC) {
   1399 		/*
   1400 		 * We no longer need help from apic_common_send_ipi()
   1401 		 * since we will not start any more CPUs.
   1402 		 *
   1403 		 * We will need to revisit this if we start supporting
   1404 		 * hot-plugging of CPUs.
   1405 		 */
   1406 		pops->psm_send_ipi = x2apic_send_ipi;
   1407 		send_dirintf = pops->psm_send_ipi;
   1408 	}
   1409 
   1410 	splx(ipltospl(LOCK_LEVEL));
   1411 	apic_init_intr();
   1412 
   1413 	/*
   1414 	 * since some systems don't enable the internal cache on the non-boot
   1415 	 * cpus, so we have to enable them here
   1416 	 */
   1417 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
   1418 
   1419 #ifdef	DEBUG
   1420 	APIC_AV_PENDING_SET();
   1421 #else
   1422 	if (apic_mode == LOCAL_APIC)
   1423 		APIC_AV_PENDING_SET();
   1424 #endif	/* DEBUG */
   1425 
   1426 	/*
   1427 	 * We may be booting, or resuming from suspend; aci_status will
   1428 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
   1429 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
   1430 	 */
   1431 	cpun = psm_get_cpu_id();
   1432 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
   1433 
   1434 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
   1435 	return (PSM_SUCCESS);
   1436 }
   1437 
   1438 processorid_t
   1439 apic_get_next_processorid(processorid_t cpu_id)
   1440 {
   1441 
   1442 	int i;
   1443 
   1444 	if (cpu_id == -1)
   1445 		return ((processorid_t)0);
   1446 
   1447 	for (i = cpu_id + 1; i < NCPU; i++) {
   1448 		if (CPU_IN_SET(apic_cpumask, i))
   1449 			return (i);
   1450 	}
   1451 
   1452 	return ((processorid_t)-1);
   1453 }
   1454 
   1455 
   1456 /*
   1457  * type == -1 indicates it is an internal request. Do not change
   1458  * resv_vector for these requests
   1459  */
   1460 static int
   1461 apic_get_ipivect(int ipl, int type)
   1462 {
   1463 	uchar_t vector;
   1464 	int irq;
   1465 
   1466 	if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) {
   1467 		if (vector = apic_allocate_vector(ipl, irq, 1)) {
   1468 			apic_irq_table[irq]->airq_mps_intr_index =
   1469 			    RESERVE_INDEX;
   1470 			apic_irq_table[irq]->airq_vector = vector;
   1471 			if (type != -1) {
   1472 				apic_resv_vector[ipl] = vector;
   1473 			}
   1474 			return (irq);
   1475 		}
   1476 	}
   1477 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
   1478 	return (-1);	/* shouldn't happen */
   1479 }
   1480 
   1481 static int
   1482 apic_getclkirq(int ipl)
   1483 {
   1484 	int	irq;
   1485 
   1486 	if ((irq = apic_get_ipivect(ipl, -1)) == -1)
   1487 		return (-1);
   1488 	/*
   1489 	 * Note the vector in apic_clkvect for per clock handling.
   1490 	 */
   1491 	apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT;
   1492 	APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n",
   1493 	    apic_clkvect));
   1494 	return (irq);
   1495 }
   1496 
   1497 
   1498 /*
   1499  * Return the number of APIC clock ticks elapsed for 8245 to decrement
   1500  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
   1501  */
   1502 static uint_t
   1503 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
   1504 {
   1505 	uint8_t		pit_tick_lo;
   1506 	uint16_t	pit_tick, target_pit_tick;
   1507 	uint32_t	start_apic_tick, end_apic_tick;
   1508 	ulong_t		iflag;
   1509 	uint32_t	reg;
   1510 
   1511 	reg = addr + APIC_CURR_COUNT - apicadr;
   1512 
   1513 	iflag = intr_clear();
   1514 
   1515 	do {
   1516 		pit_tick_lo = inb(PITCTR0_PORT);
   1517 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
   1518 	} while (pit_tick < APIC_TIME_MIN ||
   1519 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
   1520 
   1521 	/*
   1522 	 * Wait for the 8254 to decrement by 5 ticks to ensure
   1523 	 * we didn't start in the middle of a tick.
   1524 	 * Compare with 0x10 for the wrap around case.
   1525 	 */
   1526 	target_pit_tick = pit_tick - 5;
   1527 	do {
   1528 		pit_tick_lo = inb(PITCTR0_PORT);
   1529 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
   1530 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
   1531 
   1532 	start_apic_tick = apic_reg_ops->apic_read(reg);
   1533 
   1534 	/*
   1535 	 * Wait for the 8254 to decrement by
   1536 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
   1537 	 */
   1538 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
   1539 	do {
   1540 		pit_tick_lo = inb(PITCTR0_PORT);
   1541 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
   1542 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
   1543 
   1544 	end_apic_tick = apic_reg_ops->apic_read(reg);
   1545 
   1546 	*pit_ticks_adj = target_pit_tick - pit_tick;
   1547 
   1548 	intr_restore(iflag);
   1549 
   1550 	return (start_apic_tick - end_apic_tick);
   1551 }
   1552 
   1553 /*
   1554  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
   1555  * frequency.  Note at this stage in the boot sequence, the boot processor
   1556  * is the only active processor.
   1557  * hertz value of 0 indicates a one-shot mode request.  In this case
   1558  * the function returns the resolution (in nanoseconds) for the hardware
   1559  * timer interrupt.  If one-shot mode capability is not available,
   1560  * the return value will be 0. apic_enable_oneshot is a global switch
   1561  * for disabling the functionality.
   1562  * A non-zero positive value for hertz indicates a periodic mode request.
   1563  * In this case the hardware will be programmed to generate clock interrupts
   1564  * at hertz frequency and returns the resolution of interrupts in
   1565  * nanosecond.
   1566  */
   1567 
   1568 static int
   1569 apic_clkinit(int hertz)
   1570 {
   1571 	uint_t		apic_ticks = 0;
   1572 	uint_t		pit_ticks;
   1573 	int		ret;
   1574 	uint16_t	pit_ticks_adj;
   1575 	static int	firsttime = 1;
   1576 
   1577 	if (firsttime) {
   1578 		/* first time calibrate on CPU0 only */
   1579 
   1580 		apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
   1581 		apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
   1582 		apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
   1583 
   1584 		/* total number of PIT ticks corresponding to apic_ticks */
   1585 		pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
   1586 
   1587 		/*
   1588 		 * Determine the number of nanoseconds per APIC clock tick
   1589 		 * and then determine how many APIC ticks to interrupt at the
   1590 		 * desired frequency
   1591 		 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
   1592 		 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
   1593 		 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
   1594 		 * pic_ticks_per_SFns =
   1595 		 *   (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
   1596 		 */
   1597 		apic_ticks_per_SFnsecs =
   1598 		    ((SF * apic_ticks * PIT_HZ) /
   1599 		    ((uint64_t)pit_ticks * NANOSEC));
   1600 
   1601 		/* the interval timer initial count is 32 bit max */
   1602 		apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL);
   1603 		firsttime = 0;
   1604 	}
   1605 
   1606 	if (hertz != 0) {
   1607 		/* periodic */
   1608 		apic_nsec_per_intr = NANOSEC / hertz;
   1609 		apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr);
   1610 	}
   1611 
   1612 	apic_int_busy_mark = (apic_int_busy_mark *
   1613 	    apic_sample_factor_redistribution) / 100;
   1614 	apic_int_free_mark = (apic_int_free_mark *
   1615 	    apic_sample_factor_redistribution) / 100;
   1616 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
   1617 	    apic_sample_factor_redistribution) / 100;
   1618 
   1619 	if (hertz == 0) {
   1620 		/* requested one_shot */
   1621 		if (!tsc_gethrtime_enable || !apic_oneshot_enable)
   1622 			return (0);
   1623 		apic_oneshot = 1;
   1624 		ret = (int)APIC_TICKS_TO_NSECS(1);
   1625 	} else {
   1626 		/* program the local APIC to interrupt at the given frequency */
   1627 		apic_reg_ops->apic_write(APIC_INIT_COUNT, apic_hertz_count);
   1628 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
   1629 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
   1630 		apic_oneshot = 0;
   1631 		ret = NANOSEC / hertz;
   1632 	}
   1633 
   1634 	return (ret);
   1635 
   1636 }
   1637 
   1638 /*
   1639  * apic_preshutdown:
   1640  * Called early in shutdown whilst we can still access filesystems to do
   1641  * things like loading modules which will be required to complete shutdown
   1642  * after filesystems are all unmounted.
   1643  */
   1644 static void
   1645 apic_preshutdown(int cmd, int fcn)
   1646 {
   1647 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
   1648 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
   1649 
   1650 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
   1651 		return;
   1652 	}
   1653 }
   1654 
   1655 static void
   1656 apic_shutdown(int cmd, int fcn)
   1657 {
   1658 	int restarts, attempts;
   1659 	int i;
   1660 	uchar_t	byte;
   1661 	ulong_t iflag;
   1662 
   1663 	hpet_acpi_fini();
   1664 
   1665 	/* Send NMI to all CPUs except self to do per processor shutdown */
   1666 	iflag = intr_clear();
   1667 #ifdef	DEBUG
   1668 	APIC_AV_PENDING_SET();
   1669 #else
   1670 	if (apic_mode == LOCAL_APIC)
   1671 		APIC_AV_PENDING_SET();
   1672 #endif /* DEBUG */
   1673 	apic_shutdown_processors = 1;
   1674 	apic_reg_ops->apic_write(APIC_INT_CMD1,
   1675 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
   1676 
   1677 	/* restore cmos shutdown byte before reboot */
   1678 	if (apic_cmos_ssb_set) {
   1679 		outb(CMOS_ADDR, SSB);
   1680 		outb(CMOS_DATA, 0);
   1681 	}
   1682 
   1683 	ioapic_disable_redirection();
   1684 
   1685 	/*	disable apic mode if imcr present	*/
   1686 	if (apic_imcrp) {
   1687 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
   1688 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
   1689 	}
   1690 
   1691 	apic_disable_local_apic();
   1692 
   1693 	intr_restore(iflag);
   1694 
   1695 	/* remainder of function is for shutdown cases only */
   1696 	if (cmd != A_SHUTDOWN)
   1697 		return;
   1698 
   1699 	/*
   1700 	 * Switch system back into Legacy-Mode if using ACPI and
   1701 	 * not powering-off.  Some BIOSes need to remain in ACPI-mode
   1702 	 * for power-off to succeed (Dell Dimension 4600)
   1703 	 * Do not disable ACPI while doing fastreboot
   1704 	 */
   1705 	if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
   1706 		(void) AcpiDisable();
   1707 
   1708 	if (fcn == AD_FASTREBOOT) {
   1709 		apic_reg_ops->apic_write(APIC_INT_CMD1,
   1710 		    AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
   1711 	}
   1712 
   1713 	/* remainder of function is for shutdown+poweroff case only */
   1714 	if (fcn != AD_POWEROFF)
   1715 		return;
   1716 
   1717 	switch (apic_poweroff_method) {
   1718 		case APIC_POWEROFF_VIA_RTC:
   1719 
   1720 			/* select the extended NVRAM bank in the RTC */
   1721 			outb(CMOS_ADDR, RTC_REGA);
   1722 			byte = inb(CMOS_DATA);
   1723 			outb(CMOS_DATA, (byte | EXT_BANK));
   1724 
   1725 			outb(CMOS_ADDR, PFR_REG);
   1726 
   1727 			/* for Predator must toggle the PAB bit */
   1728 			byte = inb(CMOS_DATA);
   1729 
   1730 			/*
   1731 			 * clear power active bar, wakeup alarm and
   1732 			 * kickstart
   1733 			 */
   1734 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
   1735 			outb(CMOS_DATA, byte);
   1736 
   1737 			/* delay before next write */
   1738 			drv_usecwait(1000);
   1739 
   1740 			/* for S40 the following would suffice */
   1741 			byte = inb(CMOS_DATA);
   1742 
   1743 			/* power active bar control bit */
   1744 			byte |= PAB_CBIT;
   1745 			outb(CMOS_DATA, byte);
   1746 
   1747 			break;
   1748 
   1749 		case APIC_POWEROFF_VIA_ASPEN_BMC:
   1750 			restarts = 0;
   1751 restart_aspen_bmc:
   1752 			if (++restarts == 3)
   1753 				break;
   1754 			attempts = 0;
   1755 			do {
   1756 				byte = inb(MISMIC_FLAG_REGISTER);
   1757 				byte &= MISMIC_BUSY_MASK;
   1758 				if (byte != 0) {
   1759 					drv_usecwait(1000);
   1760 					if (attempts >= 3)
   1761 						goto restart_aspen_bmc;
   1762 					++attempts;
   1763 				}
   1764 			} while (byte != 0);
   1765 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
   1766 			byte = inb(MISMIC_FLAG_REGISTER);
   1767 			byte |= 0x1;
   1768 			outb(MISMIC_FLAG_REGISTER, byte);
   1769 			i = 0;
   1770 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
   1771 			    i++) {
   1772 				attempts = 0;
   1773 				do {
   1774 					byte = inb(MISMIC_FLAG_REGISTER);
   1775 					byte &= MISMIC_BUSY_MASK;
   1776 					if (byte != 0) {
   1777 						drv_usecwait(1000);
   1778 						if (attempts >= 3)
   1779 							goto restart_aspen_bmc;
   1780 						++attempts;
   1781 					}
   1782 				} while (byte != 0);
   1783 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
   1784 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
   1785 				byte = inb(MISMIC_FLAG_REGISTER);
   1786 				byte |= 0x1;
   1787 				outb(MISMIC_FLAG_REGISTER, byte);
   1788 			}
   1789 			break;
   1790 
   1791 		case APIC_POWEROFF_VIA_SITKA_BMC:
   1792 			restarts = 0;
   1793 restart_sitka_bmc:
   1794 			if (++restarts == 3)
   1795 				break;
   1796 			attempts = 0;
   1797 			do {
   1798 				byte = inb(SMS_STATUS_REGISTER);
   1799 				byte &= SMS_STATE_MASK;
   1800 				if ((byte == SMS_READ_STATE) ||
   1801 				    (byte == SMS_WRITE_STATE)) {
   1802 					drv_usecwait(1000);
   1803 					if (attempts >= 3)
   1804 						goto restart_sitka_bmc;
   1805 					++attempts;
   1806 				}
   1807 			} while ((byte == SMS_READ_STATE) ||
   1808 			    (byte == SMS_WRITE_STATE));
   1809 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
   1810 			i = 0;
   1811 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
   1812 			    i++) {
   1813 				attempts = 0;
   1814 				do {
   1815 					byte = inb(SMS_STATUS_REGISTER);
   1816 					byte &= SMS_IBF_MASK;
   1817 					if (byte != 0) {
   1818 						drv_usecwait(1000);
   1819 						if (attempts >= 3)
   1820 							goto restart_sitka_bmc;
   1821 						++attempts;
   1822 					}
   1823 				} while (byte != 0);
   1824 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
   1825 			}
   1826 			break;
   1827 
   1828 		case APIC_POWEROFF_NONE:
   1829 
   1830 			/* If no APIC direct method, we will try using ACPI */
   1831 			if (apic_enable_acpi) {
   1832 				if (acpi_poweroff() == 1)
   1833 					return;
   1834 			} else
   1835 				return;
   1836 
   1837 			break;
   1838 	}
   1839 	/*
   1840 	 * Wait a limited time here for power to go off.
   1841 	 * If the power does not go off, then there was a
   1842 	 * problem and we should continue to the halt which
   1843 	 * prints a message for the user to press a key to
   1844 	 * reboot.
   1845 	 */
   1846 	drv_usecwait(7000000); /* wait seven seconds */
   1847 
   1848 }
   1849 
   1850 /*
   1851  * Try and disable all interrupts. We just assign interrupts to other
   1852  * processors based on policy. If any were bound by user request, we
   1853  * let them continue and return failure. We do not bother to check
   1854  * for cache affinity while rebinding.
   1855  */
   1856 
   1857 static int
   1858 apic_disable_intr(processorid_t cpun)
   1859 {
   1860 	int bind_cpu = 0, i, hardbound = 0;
   1861 	apic_irq_t *irq_ptr;
   1862 	ulong_t iflag;
   1863 
   1864 	iflag = intr_clear();
   1865 	lock_set(&apic_ioapic_lock);
   1866 
   1867 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
   1868 		if (apic_reprogram_info[i].done == B_FALSE) {
   1869 			if (apic_reprogram_info[i].bindcpu == cpun) {
   1870 				/*
   1871 				 * CPU is busy -- it's the target of
   1872 				 * a pending reprogramming attempt
   1873 				 */
   1874 				lock_clear(&apic_ioapic_lock);
   1875 				intr_restore(iflag);
   1876 				return (PSM_FAILURE);
   1877 			}
   1878 		}
   1879 	}
   1880 
   1881 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
   1882 
   1883 	apic_cpus[cpun].aci_curipl = 0;
   1884 
   1885 	i = apic_min_device_irq;
   1886 	for (; i <= apic_max_device_irq; i++) {
   1887 		/*
   1888 		 * If there are bound interrupts on this cpu, then
   1889 		 * rebind them to other processors.
   1890 		 */
   1891 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
   1892 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
   1893 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
   1894 			    ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
   1895 			    apic_nproc));
   1896 
   1897 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
   1898 				hardbound = 1;
   1899 				continue;
   1900 			}
   1901 
   1902 			if (irq_ptr->airq_temp_cpu == cpun) {
   1903 				do {
   1904 					bind_cpu = apic_next_bind_cpu++;
   1905 					if (bind_cpu >= apic_nproc) {
   1906 						apic_next_bind_cpu = 1;
   1907 						bind_cpu = 0;
   1908 
   1909 					}
   1910 				} while (apic_rebind_all(irq_ptr, bind_cpu));
   1911 			}
   1912 		}
   1913 	}
   1914 
   1915 	lock_clear(&apic_ioapic_lock);
   1916 	intr_restore(iflag);
   1917 
   1918 	if (hardbound) {
   1919 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
   1920 		    "due to user bound interrupts", cpun);
   1921 		return (PSM_FAILURE);
   1922 	}
   1923 	else
   1924 		return (PSM_SUCCESS);
   1925 }
   1926 
   1927 /*
   1928  * Bind interrupts to the CPU's local APIC.
   1929  * Interrupts should not be bound to a CPU's local APIC until the CPU
   1930  * is ready to receive interrupts.
   1931  */
   1932 static void
   1933 apic_enable_intr(processorid_t cpun)
   1934 {
   1935 	int	i;
   1936 	apic_irq_t *irq_ptr;
   1937 	ulong_t iflag;
   1938 
   1939 	iflag = intr_clear();
   1940 	lock_set(&apic_ioapic_lock);
   1941 
   1942 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
   1943 
   1944 	i = apic_min_device_irq;
   1945 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
   1946 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
   1947 			if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) {
   1948 				(void) apic_rebind_all(irq_ptr,
   1949 				    irq_ptr->airq_cpu);
   1950 			}
   1951 		}
   1952 	}
   1953 
   1954 	lock_clear(&apic_ioapic_lock);
   1955 	intr_restore(iflag);
   1956 }
   1957 
   1958 
   1959 /*
   1960  * This function will reprogram the timer.
   1961  *
   1962  * When in oneshot mode the argument is the absolute time in future to
   1963  * generate the interrupt at.
   1964  *
   1965  * When in periodic mode, the argument is the interval at which the
   1966  * interrupts should be generated. There is no need to support the periodic
   1967  * mode timer change at this time.
   1968  */
   1969 static void
   1970 apic_timer_reprogram(hrtime_t time)
   1971 {
   1972 	hrtime_t now;
   1973 	uint_t ticks;
   1974 	int64_t delta;
   1975 
   1976 	/*
   1977 	 * We should be called from high PIL context (CBE_HIGH_PIL),
   1978 	 * so kpreempt is disabled.
   1979 	 */
   1980 
   1981 	if (!apic_oneshot) {
   1982 		/* time is the interval for periodic mode */
   1983 		ticks = APIC_NSECS_TO_TICKS(time);
   1984 	} else {
   1985 		/* one shot mode */
   1986 
   1987 		now = gethrtime();
   1988 		delta = time - now;
   1989 
   1990 		if (delta <= 0) {
   1991 			/*
   1992 			 * requested to generate an interrupt in the past
   1993 			 * generate an interrupt as soon as possible
   1994 			 */
   1995 			ticks = apic_min_timer_ticks;
   1996 		} else if (delta > apic_nsec_max) {
   1997 			/*
   1998 			 * requested to generate an interrupt at a time
   1999 			 * further than what we are capable of. Set to max
   2000 			 * the hardware can handle
   2001 			 */
   2002 
   2003 			ticks = APIC_MAXVAL;
   2004 #ifdef DEBUG
   2005 			cmn_err(CE_CONT, "apic_timer_reprogram, request at"
   2006 			    "  %lld  too far in future, current time"
   2007 			    "  %lld \n", time, now);
   2008 #endif
   2009 		} else
   2010 			ticks = APIC_NSECS_TO_TICKS(delta);
   2011 	}
   2012 
   2013 	if (ticks < apic_min_timer_ticks)
   2014 		ticks = apic_min_timer_ticks;
   2015 
   2016 	apic_reg_ops->apic_write(APIC_INIT_COUNT, ticks);
   2017 }
   2018 
   2019 /*
   2020  * This function will enable timer interrupts.
   2021  */
   2022 static void
   2023 apic_timer_enable(void)
   2024 {
   2025 	/*
   2026 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
   2027 	 * so kpreempt is disabled.
   2028 	 */
   2029 
   2030 	if (!apic_oneshot) {
   2031 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
   2032 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
   2033 	} else {
   2034 		/* one shot */
   2035 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
   2036 		    (apic_clkvect + APIC_BASE_VECT));
   2037 	}
   2038 }
   2039 
   2040 /*
   2041  * This function will disable timer interrupts.
   2042  */
   2043 static void
   2044 apic_timer_disable(void)
   2045 {
   2046 	/*
   2047 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
   2048 	 * so kpreempt is disabled.
   2049 	 */
   2050 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
   2051 	    (apic_clkvect + APIC_BASE_VECT) | AV_MASK);
   2052 }
   2053 
   2054 /*
   2055  * Set timer far into the future and return timer
   2056  * current Count in nanoseconds.
   2057  */
   2058 hrtime_t
   2059 apic_timer_stop_count(void)
   2060 {
   2061 	hrtime_t	ns_val;
   2062 	int		enable_val, count_val;
   2063 
   2064 	/*
   2065 	 * Should be called with interrupts disabled.
   2066 	 */
   2067 	ASSERT(!interrupts_enabled());
   2068 
   2069 	enable_val = apic_reg_ops->apic_read(APIC_LOCAL_TIMER);
   2070 	if ((enable_val & AV_MASK) == AV_MASK)
   2071 		return ((hrtime_t)-1);		/* timer is disabled */
   2072 
   2073 	count_val = apic_reg_ops->apic_read(APIC_CURR_COUNT);
   2074 	ns_val = APIC_TICKS_TO_NSECS(count_val);
   2075 
   2076 	apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
   2077 
   2078 	return (ns_val);
   2079 }
   2080 
   2081 /*
   2082  * Reprogram timer after Deep C-State.
   2083  */
   2084 void
   2085 apic_timer_restart(hrtime_t time)
   2086 {
   2087 	apic_timer_reprogram(time);
   2088 }
   2089 
   2090 ddi_periodic_t apic_periodic_id;
   2091 
   2092 /*
   2093  * If this module needs a periodic handler for the interrupt distribution, it
   2094  * can be added here. The argument to the periodic handler is not currently
   2095  * used, but is reserved for future.
   2096  */
   2097 static void
   2098 apic_post_cyclic_setup(void *arg)
   2099 {
   2100 _NOTE(ARGUNUSED(arg))
   2101 	/* cpu_lock is held */
   2102 	/* set up a periodic handler for intr redistribution */
   2103 
   2104 	/*
   2105 	 * In peridoc mode intr redistribution processing is done in
   2106 	 * apic_intr_enter during clk intr processing
   2107 	 */
   2108 	if (!apic_oneshot)
   2109 		return;
   2110 	/*
   2111 	 * Register a periodical handler for the redistribution processing.
   2112 	 * On X86, CY_LOW_LEVEL is mapped to the level 2 interrupt, so
   2113 	 * DDI_IPL_2 should be passed to ddi_periodic_add() here.
   2114 	 */
   2115 	apic_periodic_id = ddi_periodic_add(
   2116 	    (void (*)(void *))apic_redistribute_compute, NULL,
   2117 	    apic_redistribute_sample_interval, DDI_IPL_2);
   2118 }
   2119 
   2120 static void
   2121 apic_redistribute_compute(void)
   2122 {
   2123 	int	i, j, max_busy;
   2124 
   2125 	if (apic_enable_dynamic_migration) {
   2126 		if (++apic_nticks == apic_sample_factor_redistribution) {
   2127 			/*
   2128 			 * Time to call apic_intr_redistribute().
   2129 			 * reset apic_nticks. This will cause max_busy
   2130 			 * to be calculated below and if it is more than
   2131 			 * apic_int_busy, we will do the whole thing
   2132 			 */
   2133 			apic_nticks = 0;
   2134 		}
   2135 		max_busy = 0;
   2136 		for (i = 0; i < apic_nproc; i++) {
   2137 
   2138 			/*
   2139 			 * Check if curipl is non zero & if ISR is in
   2140 			 * progress
   2141 			 */
   2142 			if (((j = apic_cpus[i].aci_curipl) != 0) &&
   2143 			    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
   2144 
   2145 				int	irq;
   2146 				apic_cpus[i].aci_busy++;
   2147 				irq = apic_cpus[i].aci_current[j];
   2148 				apic_irq_table[irq]->airq_busy++;
   2149 			}
   2150 
   2151 			if (!apic_nticks &&
   2152 			    (apic_cpus[i].aci_busy > max_busy))
   2153 				max_busy = apic_cpus[i].aci_busy;
   2154 		}
   2155 		if (!apic_nticks) {
   2156 			if (max_busy > apic_int_busy_mark) {
   2157 			/*
   2158 			 * We could make the following check be
   2159 			 * skipped > 1 in which case, we get a
   2160 			 * redistribution at half the busy mark (due to
   2161 			 * double interval). Need to be able to collect
   2162 			 * more empirical data to decide if that is a
   2163 			 * good strategy. Punt for now.
   2164 			 */
   2165 				if (apic_skipped_redistribute) {
   2166 					apic_cleanup_busy();
   2167 					apic_skipped_redistribute = 0;
   2168 				} else {
   2169 					apic_intr_redistribute();
   2170 				}
   2171 			} else
   2172 				apic_skipped_redistribute++;
   2173 		}
   2174 	}
   2175 }
   2176 
   2177 
   2178 /*
   2179  * The following functions are in the platform specific file so that they
   2180  * can be different functions depending on whether we are running on
   2181  * bare metal or a hypervisor.
   2182  */
   2183 
   2184 /*
   2185  * map an apic for memory-mapped access
   2186  */
   2187 uint32_t *
   2188 mapin_apic(uint32_t addr, size_t len, int flags)
   2189 {
   2190 	/*LINTED: pointer cast may result in improper alignment */
   2191 	return ((uint32_t *)psm_map_phys(addr, len, flags));
   2192 }
   2193 
   2194 uint32_t *
   2195 mapin_ioapic(uint32_t addr, size_t len, int flags)
   2196 {
   2197 	return (mapin_apic(addr, len, flags));
   2198 }
   2199 
   2200 /*
   2201  * unmap an apic
   2202  */
   2203 void
   2204 mapout_apic(caddr_t addr, size_t len)
   2205 {
   2206 	psm_unmap_phys(addr, len);
   2207 }
   2208 
   2209 void
   2210 mapout_ioapic(caddr_t addr, size_t len)
   2211 {
   2212 	mapout_apic(addr, len);
   2213 }
   2214 
   2215 /*
   2216  * Check to make sure there are enough irq slots
   2217  */
   2218 int
   2219 apic_check_free_irqs(int count)
   2220 {
   2221 	int i, avail;
   2222 
   2223 	avail = 0;
   2224 	for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) {
   2225 		if ((apic_irq_table[i] == NULL) ||
   2226 		    apic_irq_table[i]->airq_mps_intr_index == FREE_INDEX) {
   2227 			if (++avail >= count)
   2228 				return (PSM_SUCCESS);
   2229 		}
   2230 	}
   2231 	return (PSM_FAILURE);
   2232 }
   2233 
   2234 /*
   2235  * This function allocates "count" MSI vector(s) for the given "dip/pri/type"
   2236  */
   2237 int
   2238 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
   2239     int behavior)
   2240 {
   2241 	int	rcount, i;
   2242 	uchar_t	start, irqno;
   2243 	uint32_t cpu;
   2244 	major_t	major;
   2245 	apic_irq_t	*irqptr;
   2246 
   2247 	DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p "
   2248 	    "inum=0x%x  pri=0x%x count=0x%x behavior=%d\n",
   2249 	    (void *)dip, inum, pri, count, behavior));
   2250 
   2251 	if (count > 1) {
   2252 		if (behavior == DDI_INTR_ALLOC_STRICT &&
   2253 		    apic_multi_msi_enable == 0)
   2254 			return (0);
   2255 		if (apic_multi_msi_enable == 0)
   2256 			count = 1;
   2257 	}
   2258 
   2259 	if ((rcount = apic_navail_vector(dip, pri)) > count)
   2260 		rcount = count;
   2261 	else if (rcount == 0 || (rcount < count &&
   2262 	    behavior == DDI_INTR_ALLOC_STRICT))
   2263 		return (0);
   2264 
   2265 	/* if not ISP2, then round it down */
   2266 	if (!ISP2(rcount))
   2267 		rcount = 1 << (highbit(rcount) - 1);
   2268 
   2269 	mutex_enter(&airq_mutex);
   2270 
   2271 	for (start = 0; rcount > 0; rcount >>= 1) {
   2272 		if ((start = apic_find_multi_vectors(pri, rcount)) != 0 ||
   2273 		    behavior == DDI_INTR_ALLOC_STRICT)
   2274 			break;
   2275 	}
   2276 
   2277 	if (start == 0) {
   2278 		/* no vector available */
   2279 		mutex_exit(&airq_mutex);
   2280 		return (0);
   2281 	}
   2282 
   2283 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
   2284 		/* not enough free irq slots available */
   2285 		mutex_exit(&airq_mutex);
   2286 		return (0);
   2287 	}
   2288 
   2289 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
   2290 	for (i = 0; i < rcount; i++) {
   2291 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
   2292 		    (uchar_t)-1) {
   2293 			/*
   2294 			 * shouldn't happen because of the
   2295 			 * apic_check_free_irqs() check earlier
   2296 			 */
   2297 			mutex_exit(&airq_mutex);
   2298 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
   2299 			    "apic_allocate_irq failed\n"));
   2300 			return (i);
   2301 		}
   2302 		apic_max_device_irq = max(irqno, apic_max_device_irq);
   2303 		apic_min_device_irq = min(irqno, apic_min_device_irq);
   2304 		irqptr = apic_irq_table[irqno];
   2305 #ifdef	DEBUG
   2306 		if (apic_vector_to_irq[start + i] != APIC_RESV_IRQ)
   2307 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
   2308 			    "apic_vector_to_irq is not APIC_RESV_IRQ\n"));
   2309 #endif
   2310 		apic_vector_to_irq[start + i] = (uchar_t)irqno;
   2311 
   2312 		irqptr->airq_vector = (uchar_t)(start + i);
   2313 		irqptr->airq_ioapicindex = (uchar_t)inum;	/* start */
   2314 		irqptr->airq_intin_no = (uchar_t)rcount;
   2315 		irqptr->airq_ipl = pri;
   2316 		irqptr->airq_vector = start + i;
   2317 		irqptr->airq_origirq = (uchar_t)(inum + i);
   2318 		irqptr->airq_share_id = 0;
   2319 		irqptr->airq_mps_intr_index = MSI_INDEX;
   2320 		irqptr->airq_dip = dip;
   2321 		irqptr->airq_major = major;
   2322 		if (i == 0) /* they all bound to the same cpu */
   2323 			cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno,
   2324 			    0xff, 0xff);
   2325 		else
   2326 			irqptr->airq_cpu = cpu;
   2327 		DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x "
   2328 		    "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno,
   2329 		    (void *)irqptr->airq_dip, irqptr->airq_vector,
   2330 		    irqptr->airq_origirq, pri));
   2331 	}
   2332 	mutex_exit(&airq_mutex);
   2333 	return (rcount);
   2334 }
   2335 
   2336 /*
   2337  * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type"
   2338  */
   2339 int
   2340 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
   2341     int behavior)
   2342 {
   2343 	int	rcount, i;
   2344 	major_t	major;
   2345 
   2346 	mutex_enter(&airq_mutex);
   2347 
   2348 	if ((rcount = apic_navail_vector(dip, pri)) > count)
   2349 		rcount = count;
   2350 	else if (rcount == 0 || (rcount < count &&
   2351 	    behavior == DDI_INTR_ALLOC_STRICT)) {
   2352 		rcount = 0;
   2353 		goto out;
   2354 	}
   2355 
   2356 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
   2357 		/* not enough free irq slots available */
   2358 		rcount = 0;
   2359 		goto out;
   2360 	}
   2361 
   2362 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
   2363 	for (i = 0; i < rcount; i++) {
   2364 		uchar_t	vector, irqno;
   2365 		apic_irq_t	*irqptr;
   2366 
   2367 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
   2368 		    (uchar_t)-1) {
   2369 			/*
   2370 			 * shouldn't happen because of the
   2371 			 * apic_check_free_irqs() check earlier
   2372 			 */
   2373 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
   2374 			    "apic_allocate_irq failed\n"));
   2375 			rcount = i;
   2376 			goto out;
   2377 		}
   2378 		if ((vector = apic_allocate_vector(pri, irqno, 1)) == 0) {
   2379 			/*
   2380 			 * shouldn't happen because of the
   2381 			 * apic_navail_vector() call earlier
   2382 			 */
   2383 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
   2384 			    "apic_allocate_vector failed\n"));
   2385 			rcount = i;
   2386 			goto out;
   2387 		}
   2388 		apic_max_device_irq = max(irqno, apic_max_device_irq);
   2389 		apic_min_device_irq = min(irqno, apic_min_device_irq);
   2390 		irqptr = apic_irq_table[irqno];
   2391 		irqptr->airq_vector = (uchar_t)vector;
   2392 		irqptr->airq_ipl = pri;
   2393 		irqptr->airq_origirq = (uchar_t)(inum + i);
   2394 		irqptr->airq_share_id = 0;
   2395 		irqptr->airq_mps_intr_index = MSIX_INDEX;
   2396 		irqptr->airq_dip = dip;
   2397 		irqptr->airq_major = major;
   2398 		irqptr->airq_cpu = apic_bind_intr(dip, irqno, 0xff, 0xff);
   2399 	}
   2400 out:
   2401 	mutex_exit(&airq_mutex);
   2402 	return (rcount);
   2403 }
   2404 
   2405 /*
   2406  * Allocate a free vector for irq at ipl. Takes care of merging of multiple
   2407  * IPLs into a single APIC level as well as stretching some IPLs onto multiple
   2408  * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority
   2409  * requests and allocated only when pri is set.
   2410  */
   2411 uchar_t
   2412 apic_allocate_vector(int ipl, int irq, int pri)
   2413 {
   2414 	int	lowest, highest, i;
   2415 
   2416 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
   2417 	lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL;
   2418 
   2419 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
   2420 		lowest -= APIC_VECTOR_PER_IPL;
   2421 
   2422 #ifdef	DEBUG
   2423 	if (apic_restrict_vector)	/* for testing shared interrupt logic */
   2424 		highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS;
   2425 #endif /* DEBUG */
   2426 	if (pri == 0)
   2427 		highest -= APIC_HI_PRI_VECTS;
   2428 
   2429 	for (i = lowest; i < highest; i++) {
   2430 		if (APIC_CHECK_RESERVE_VECTORS(i))
   2431 			continue;
   2432 		if (apic_vector_to_irq[i] == APIC_RESV_IRQ) {
   2433 			apic_vector_to_irq[i] = (uchar_t)irq;
   2434 			return (i);
   2435 		}
   2436 	}
   2437 
   2438 	return (0);
   2439 }
   2440 
   2441 /* Mark vector as not being used by any irq */
   2442 void
   2443 apic_free_vector(uchar_t vector)
   2444 {
   2445 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
   2446 }
   2447 
   2448 uint32_t
   2449 ioapic_read(int ioapic_ix, uint32_t reg)
   2450 {
   2451 	volatile uint32_t *ioapic;
   2452 
   2453 	ioapic = apicioadr[ioapic_ix];
   2454 	ioapic[APIC_IO_REG] = reg;
   2455 	return (ioapic[APIC_IO_DATA]);
   2456 }
   2457 
   2458 void
   2459 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
   2460 {
   2461 	volatile uint32_t *ioapic;
   2462 
   2463 	ioapic = apicioadr[ioapic_ix];
   2464 	ioapic[APIC_IO_REG] = reg;
   2465 	ioapic[APIC_IO_DATA] = value;
   2466 }
   2467 
   2468 void
   2469 ioapic_write_eoi(int ioapic_ix, uint32_t value)
   2470 {
   2471 	volatile uint32_t *ioapic;
   2472 
   2473 	ioapic = apicioadr[ioapic_ix];
   2474 	ioapic[APIC_IO_EOI] = value;
   2475 }
   2476 
   2477 static processorid_t
   2478 apic_find_cpu(int flag)
   2479 {
   2480 	processorid_t acid = 0;
   2481 	int i;
   2482 
   2483 	/* Find the first CPU with the passed-in flag set */
   2484 	for (i = 0; i < apic_nproc; i++) {
   2485 		if (apic_cpus[i].aci_status & flag) {
   2486 			acid = i;
   2487 			break;
   2488 		}
   2489 	}
   2490 
   2491 	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
   2492 	return (acid);
   2493 }
   2494 
   2495 /*
   2496  * Call rebind to do the actual programming.
   2497  * Must be called with interrupts disabled and apic_ioapic_lock held
   2498  * 'p' is polymorphic -- if this function is called to process a deferred
   2499  * reprogramming, p is of type 'struct ioapic_reprogram_data *', from which
   2500  * the irq pointer is retrieved.  If not doing deferred reprogramming,
   2501  * p is of the type 'apic_irq_t *'.
   2502  *
   2503  * apic_ioapic_lock must be held across this call, as it protects apic_rebind
   2504  * and it protects apic_find_cpu() from a race in which a CPU can be taken
   2505  * offline after a cpu is selected, but before apic_rebind is called to
   2506  * bind interrupts to it.
   2507  */
   2508 int
   2509 apic_setup_io_intr(void *p, int irq, boolean_t deferred)
   2510 {
   2511 	apic_irq_t *irqptr;
   2512 	struct ioapic_reprogram_data *drep = NULL;
   2513 	int rv;
   2514 
   2515 	if (deferred) {
   2516 		drep = (struct ioapic_reprogram_data *)p;
   2517 		ASSERT(drep != NULL);
   2518 		irqptr = drep->irqp;
   2519 	} else
   2520 		irqptr = (apic_irq_t *)p;
   2521 
   2522 	ASSERT(irqptr != NULL);
   2523 
   2524 	rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, drep);
   2525 	if (rv) {
   2526 		/*
   2527 		 * CPU is not up or interrupts are disabled. Fall back to
   2528 		 * the first available CPU
   2529 		 */
   2530 		rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE),
   2531 		    drep);
   2532 	}
   2533 
   2534 	return (rv);
   2535 }
   2536 
   2537 
   2538 uchar_t
   2539 apic_modify_vector(uchar_t vector, int irq)
   2540 {
   2541 	apic_vector_to_irq[vector] = (uchar_t)irq;
   2542 	return (vector);
   2543 }
   2544 
   2545 char *
   2546 apic_get_apic_type()
   2547 {
   2548 	return (apic_psm_info.p_mach_idstring);
   2549 }
   2550 
   2551 void
   2552 x2apic_update_psm()
   2553 {
   2554 	struct psm_ops *pops = &apic_ops;
   2555 
   2556 	ASSERT(pops != NULL);
   2557 
   2558 	/*
   2559 	 * We don't need to do any magic if one of the following
   2560 	 * conditions is true :
   2561 	 * - Not being run under kernel debugger.
   2562 	 * - MP is not set.
   2563 	 * - Booted with one CPU only.
   2564 	 * - One CPU configured.
   2565 	 *
   2566 	 * We set apic_common_send_ipi() since kernel debuggers
   2567 	 * attempt to send IPIs to other slave CPUs during
   2568 	 * entry (exit) from (to) debugger.
   2569 	 */
   2570 	if (!(boothowto & RB_DEBUG) || use_mp == 0 ||
   2571 	    apic_nproc == 1 || boot_ncpus == 1) {
   2572 		pops->psm_send_ipi =  x2apic_send_ipi;
   2573 	} else {
   2574 		pops->psm_send_ipi =  apic_common_send_ipi;
   2575 	}
   2576 
   2577 	pops->psm_intr_exit = x2apic_intr_exit;
   2578 	pops->psm_setspl = x2apic_setspl;
   2579 
   2580 	send_dirintf = pops->psm_send_ipi;
   2581 
   2582 	apic_mode = LOCAL_X2APIC;
   2583 	apic_change_ops();
   2584 }
   2585 
   2586 static void
   2587 apic_intrr_init(int apic_mode)
   2588 {
   2589 	int suppress_brdcst_eoi = 0;
   2590 
   2591 	if (psm_vt_ops != NULL) {
   2592 		if (((apic_intrr_ops_t *)psm_vt_ops)->apic_intrr_init(apic_mode)
   2593 		    == DDI_SUCCESS) {
   2594 			apic_vt_ops = psm_vt_ops;
   2595 
   2596 			/*
   2597 			 * We leverage the interrupt remapping engine to
   2598 			 * suppress broadcast EOI; thus we must send the
   2599 			 * directed EOI with the directed-EOI handler.
   2600 			 */
   2601 			if (apic_directed_EOI_supported() == 0) {
   2602 				suppress_brdcst_eoi = 1;
   2603 				apic_set_directed_EOI_handler();
   2604 			}
   2605 
   2606 			apic_vt_ops->apic_intrr_enable(suppress_brdcst_eoi);
   2607 		}
   2608 	}
   2609 }
   2610 
   2611 /*ARGSUSED*/
   2612 static void
   2613 apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt)
   2614 {
   2615 	irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
   2616 }
   2617 
   2618 /*ARGSUSED*/
   2619 static void
   2620 apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs)
   2621 {
   2622 	mregs->mr_addr = MSI_ADDR_HDR |
   2623 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
   2624 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
   2625 	    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
   2626 	mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
   2627 	    mregs->mr_data;
   2628 }
   2629