Home | History | Annotate | Download | only in pcplusmp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
     29  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
     30  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
     31  * PSMI 1.5 extensions are supported in Solaris Nevada.
     32  * PSMI 1.6 extensions are supported in Solaris Nevada.
     33  */
     34 #define	PSMI_1_6
     35 
     36 #include <sys/processor.h>
     37 #include <sys/time.h>
     38 #include <sys/psm.h>
     39 #include <sys/smp_impldefs.h>
     40 #include <sys/cram.h>
     41 #include <sys/acpi/acpi.h>
     42 #include <sys/acpica.h>
     43 #include <sys/psm_common.h>
     44 #include <sys/apic.h>
     45 #include <sys/pit.h>
     46 #include <sys/ddi.h>
     47 #include <sys/sunddi.h>
     48 #include <sys/ddi_impldefs.h>
     49 #include <sys/pci.h>
     50 #include <sys/promif.h>
     51 #include <sys/x86_archext.h>
     52 #include <sys/cpc_impl.h>
     53 #include <sys/uadmin.h>
     54 #include <sys/panic.h>
     55 #include <sys/debug.h>
     56 #include <sys/archsystm.h>
     57 #include <sys/trap.h>
     58 #include <sys/machsystm.h>
     59 #include <sys/sysmacros.h>
     60 #include <sys/cpuvar.h>
     61 #include <sys/rm_platter.h>
     62 #include <sys/privregs.h>
     63 #include <sys/note.h>
     64 #include <sys/pci_intr_lib.h>
     65 #include <sys/spl.h>
     66 #include <sys/clock.h>
     67 #include <sys/dditypes.h>
     68 #include <sys/sunddi.h>
     69 #include <sys/x_call.h>
     70 #include <sys/reboot.h>
     71 #include <sys/hpet.h>
     72 
     73 /*
     74  *	Local Function Prototypes
     75  */
     76 static void apic_init_intr();
     77 static void apic_nmi_intr(caddr_t arg, struct regs *rp);
     78 
     79 /*
     80  *	standard MP entries
     81  */
     82 static int	apic_probe();
     83 static int	apic_clkinit();
     84 static int	apic_getclkirq(int ipl);
     85 static uint_t	apic_calibrate(volatile uint32_t *addr,
     86     uint16_t *pit_ticks_adj);
     87 static hrtime_t apic_gettime();
     88 static hrtime_t apic_gethrtime();
     89 static void	apic_init();
     90 static void	apic_picinit(void);
     91 static int	apic_cpu_start(processorid_t, caddr_t);
     92 static int	apic_post_cpu_start(void);
     93 static void	apic_send_ipi(int cpun, int ipl);
     94 static void	apic_set_idlecpu(processorid_t cpun);
     95 static void	apic_unset_idlecpu(processorid_t cpun);
     96 static int	apic_intr_enter(int ipl, int *vect);
     97 static void	apic_setspl(int ipl);
     98 static void	x2apic_setspl(int ipl);
     99 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
    100 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
    101 static void	apic_shutdown(int cmd, int fcn);
    102 static void	apic_preshutdown(int cmd, int fcn);
    103 static int	apic_disable_intr(processorid_t cpun);
    104 static void	apic_enable_intr(processorid_t cpun);
    105 static processorid_t	apic_get_next_processorid(processorid_t cpun);
    106 static int		apic_get_ipivect(int ipl, int type);
    107 static void	apic_timer_reprogram(hrtime_t time);
    108 static void	apic_timer_enable(void);
    109 static void	apic_timer_disable(void);
    110 static void	apic_post_cyclic_setup(void *arg);
    111 static void	apic_intrmap_init(int apic_mode);
    112 static void	apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt);
    113 static void	apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs);
    114 
    115 static int	apic_oneshot = 0;
    116 int	apic_oneshot_enable = 1; /* to allow disabling one-shot capability */
    117 
    118 /* Now the ones for Dynamic Interrupt distribution */
    119 int	apic_enable_dynamic_migration = 0;
    120 
    121 extern int apic_have_32bit_cr8;
    122 
    123 /*
    124  * These variables are frequently accessed in apic_intr_enter(),
    125  * apic_intr_exit and apic_setspl, so group them together
    126  */
    127 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
    128 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
    129 int apic_clkvect;
    130 
    131 /* vector at which error interrupts come in */
    132 int apic_errvect;
    133 int apic_enable_error_intr = 1;
    134 int apic_error_display_delay = 100;
    135 
    136 /* vector at which performance counter overflow interrupts come in */
    137 int apic_cpcovf_vect;
    138 int apic_enable_cpcovf_intr = 1;
    139 
    140 /* vector at which CMCI interrupts come in */
    141 int apic_cmci_vect;
    142 extern int cmi_enable_cmci;
    143 extern void cmi_cmci_trap(void);
    144 
    145 static kmutex_t cmci_cpu_setup_lock;	/* protects cmci_cpu_setup_registered */
    146 static int cmci_cpu_setup_registered;
    147 
    148 /*
    149  * The following vector assignments influence the value of ipltopri and
    150  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
    151  * idle to 0 and IPL 0 to 0xf to differentiate idle in case
    152  * we care to do so in future. Note some IPLs which are rarely used
    153  * will share the vector ranges and heavily used IPLs (5 and 6) have
    154  * a wide range.
    155  *
    156  * This array is used to initialize apic_ipls[] (in apic_init()).
    157  *
    158  *	IPL		Vector range.		as passed to intr_enter
    159  *	0		none.
    160  *	1,2,3		0x20-0x2f		0x0-0xf
    161  *	4		0x30-0x3f		0x10-0x1f
    162  *	5		0x40-0x5f		0x20-0x3f
    163  *	6		0x60-0x7f		0x40-0x5f
    164  *	7,8,9		0x80-0x8f		0x60-0x6f
    165  *	10		0x90-0x9f		0x70-0x7f
    166  *	11		0xa0-0xaf		0x80-0x8f
    167  *	...		...
    168  *	15		0xe0-0xef		0xc0-0xcf
    169  *	15		0xf0-0xff		0xd0-0xdf
    170  */
    171 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
    172 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15
    173 };
    174 	/*
    175 	 * The ipl of an ISR at vector X is apic_vectortoipl[X>>4]
    176 	 * NOTE that this is vector as passed into intr_enter which is
    177 	 * programmed vector - 0x20 (APIC_BASE_VECT)
    178 	 */
    179 
    180 uchar_t	apic_ipltopri[MAXIPL + 1];	/* unix ipl to apic pri	*/
    181 	/* The taskpri to be programmed into apic to mask given ipl */
    182 
    183 #if defined(__amd64)
    184 uchar_t	apic_cr8pri[MAXIPL + 1];	/* unix ipl to cr8 pri	*/
    185 #endif
    186 
    187 /*
    188  * Correlation of the hardware vector to the IPL in use, initialized
    189  * from apic_vectortoipl[] in apic_init().  The final IPLs may not correlate
    190  * to the IPLs in apic_vectortoipl on some systems that share interrupt lines
    191  * connected to errata-stricken IOAPICs
    192  */
    193 uchar_t apic_ipls[APIC_AVAIL_VECTOR];
    194 
    195 /*
    196  * Patchable global variables.
    197  */
    198 int	apic_forceload = 0;
    199 
    200 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
    201 					/* 1 - use gettime() for performance */
    202 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
    203 int	apic_enable_hwsoftint = 0;	/* 0 - disable, 1 - enable	*/
    204 int	apic_enable_bind_log = 1;	/* 1 - display interrupt binding log */
    205 int	apic_panic_on_nmi = 0;
    206 int	apic_panic_on_apic_error = 0;
    207 
    208 int	apic_verbose = 0;
    209 
    210 /* minimum number of timer ticks to program to */
    211 int apic_min_timer_ticks = 1;
    212 /*
    213  *	Local static data
    214  */
    215 static struct	psm_ops apic_ops = {
    216 	apic_probe,
    217 
    218 	apic_init,
    219 	apic_picinit,
    220 	apic_intr_enter,
    221 	apic_intr_exit,
    222 	apic_setspl,
    223 	apic_addspl,
    224 	apic_delspl,
    225 	apic_disable_intr,
    226 	apic_enable_intr,
    227 	(int (*)(int))NULL,		/* psm_softlvl_to_irq */
    228 	(void (*)(int))NULL,		/* psm_set_softintr */
    229 
    230 	apic_set_idlecpu,
    231 	apic_unset_idlecpu,
    232 
    233 	apic_clkinit,
    234 	apic_getclkirq,
    235 	(void (*)(void))NULL,		/* psm_hrtimeinit */
    236 	apic_gethrtime,
    237 
    238 	apic_get_next_processorid,
    239 	apic_cpu_start,
    240 	apic_post_cpu_start,
    241 	apic_shutdown,
    242 	apic_get_ipivect,
    243 	apic_send_ipi,
    244 
    245 	(int (*)(dev_info_t *, int))NULL,	/* psm_translate_irq */
    246 	(void (*)(int, char *))NULL,	/* psm_notify_error */
    247 	(void (*)(int))NULL,		/* psm_notify_func */
    248 	apic_timer_reprogram,
    249 	apic_timer_enable,
    250 	apic_timer_disable,
    251 	apic_post_cyclic_setup,
    252 	apic_preshutdown,
    253 	apic_intr_ops,			/* Advanced DDI Interrupt framework */
    254 	apic_state,			/* save, restore apic state for S3 */
    255 };
    256 
    257 
    258 static struct	psm_info apic_psm_info = {
    259 	PSM_INFO_VER01_6,			/* version */
    260 	PSM_OWN_EXCLUSIVE,			/* ownership */
    261 	(struct psm_ops *)&apic_ops,		/* operation */
    262 	APIC_PCPLUSMP_NAME,			/* machine name */
    263 	"pcplusmp v1.4 compatible",
    264 };
    265 
    266 static void *apic_hdlp;
    267 
    268 #ifdef DEBUG
    269 int	apic_debug = 0;
    270 int	apic_restrict_vector = 0;
    271 
    272 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
    273 int	apic_debug_msgbufindex = 0;
    274 
    275 #endif /* DEBUG */
    276 
    277 apic_cpus_info_t	*apic_cpus;
    278 
    279 cpuset_t	apic_cpumask;
    280 uint_t	apic_picinit_called;
    281 
    282 /* Flag to indicate that we need to shut down all processors */
    283 static uint_t	apic_shutdown_processors;
    284 
    285 uint_t apic_nsec_per_intr = 0;
    286 
    287 /*
    288  * apic_let_idle_redistribute can have the following values:
    289  * 0 - If clock decremented it from 1 to 0, clock has to call redistribute.
    290  * apic_redistribute_lock prevents multiple idle cpus from redistributing
    291  */
    292 int	apic_num_idle_redistributions = 0;
    293 static	int apic_let_idle_redistribute = 0;
    294 static	uint_t apic_nticks = 0;
    295 static	uint_t apic_skipped_redistribute = 0;
    296 
    297 /* to gather intr data and redistribute */
    298 static void apic_redistribute_compute(void);
    299 
    300 static	uint_t last_count_read = 0;
    301 static	lock_t	apic_gethrtime_lock;
    302 volatile int	apic_hrtime_stamp = 0;
    303 volatile hrtime_t apic_nsec_since_boot = 0;
    304 static uint_t apic_hertz_count;
    305 
    306 uint64_t apic_ticks_per_SFnsecs;	/* # of ticks in SF nsecs */
    307 
    308 static hrtime_t apic_nsec_max;
    309 
    310 static	hrtime_t	apic_last_hrtime = 0;
    311 int		apic_hrtime_error = 0;
    312 int		apic_remote_hrterr = 0;
    313 int		apic_num_nmis = 0;
    314 int		apic_apic_error = 0;
    315 int		apic_num_apic_errors = 0;
    316 int		apic_num_cksum_errors = 0;
    317 
    318 int	apic_error = 0;
    319 static	int	apic_cmos_ssb_set = 0;
    320 
    321 /* use to make sure only one cpu handles the nmi */
    322 static	lock_t	apic_nmi_lock;
    323 /* use to make sure only one cpu handles the error interrupt */
    324 static	lock_t	apic_error_lock;
    325 
    326 static	struct {
    327 	uchar_t	cntl;
    328 	uchar_t	data;
    329 } aspen_bmc[] = {
    330 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
    331 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
    332 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
    333 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
    334 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
    335 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
    336 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
    337 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
    338 
    339 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
    340 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
    341 };
    342 
    343 static	struct {
    344 	int	port;
    345 	uchar_t	data;
    346 } sitka_bmc[] = {
    347 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
    348 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
    349 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
    350 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
    351 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
    352 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
    353 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
    354 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
    355 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
    356 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
    357 
    358 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
    359 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
    360 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
    361 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
    362 };
    363 
    364 /* Patchable global variables. */
    365 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
    366 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
    367 
    368 /* default apic ops without interrupt remapping */
    369 static apic_intrmap_ops_t apic_nointrmap_ops = {
    370 	(int (*)(int))return_instr,
    371 	(void (*)(int))return_instr,
    372 	(void (*)(apic_irq_t *))return_instr,
    373 	(void (*)(apic_irq_t *, void *))return_instr,
    374 	(void (*)(apic_irq_t *))return_instr,
    375 	apic_record_ioapic_rdt,
    376 	apic_record_msi,
    377 };
    378 
    379 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
    380 
    381 /*
    382  *	This is the loadable module wrapper
    383  */
    384 
    385 int
    386 _init(void)
    387 {
    388 	if (apic_coarse_hrtime)
    389 		apic_ops.psm_gethrtime = &apic_gettime;
    390 	return (psm_mod_init(&apic_hdlp, &apic_psm_info));
    391 }
    392 
    393 int
    394 _fini(void)
    395 {
    396 	return (psm_mod_fini(&apic_hdlp, &apic_psm_info));
    397 }
    398 
    399 int
    400 _info(struct modinfo *modinfop)
    401 {
    402 	return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop));
    403 }
    404 
    405 
    406 static int
    407 apic_probe()
    408 {
    409 	return (apic_probe_common(apic_psm_info.p_mach_idstring));
    410 }
    411 
    412 void
    413 apic_init()
    414 {
    415 	int i;
    416 	int	j = 1;
    417 
    418 	apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */
    419 	for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) {
    420 		if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) &&
    421 		    (apic_vectortoipl[i + 1] == apic_vectortoipl[i]))
    422 			/* get to highest vector at the same ipl */
    423 			continue;
    424 		for (; j <= apic_vectortoipl[i]; j++) {
    425 			apic_ipltopri[j] = (i << APIC_IPL_SHIFT) +
    426 			    APIC_BASE_VECT;
    427 		}
    428 	}
    429 	for (; j < MAXIPL + 1; j++)
    430 		/* fill up any empty ipltopri slots */
    431 		apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT;
    432 	apic_init_common();
    433 #if defined(__amd64)
    434 	/*
    435 	 * Make cpu-specific interrupt info point to cr8pri vector
    436 	 */
    437 	for (i = 0; i <= MAXIPL; i++)
    438 		apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT;
    439 	CPU->cpu_pri_data = apic_cr8pri;
    440 #else
    441 	if (cpuid_have_cr8access(CPU))
    442 		apic_have_32bit_cr8 = 1;
    443 #endif	/* __amd64 */
    444 }
    445 
    446 /*
    447  * handler for APIC Error interrupt. Just print a warning and continue
    448  */
    449 static int
    450 apic_error_intr()
    451 {
    452 	uint_t	error0, error1, error;
    453 	uint_t	i;
    454 
    455 	/*
    456 	 * We need to write before read as per 7.4.17 of system prog manual.
    457 	 * We do both and or the results to be safe
    458 	 */
    459 	error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
    460 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    461 	error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
    462 	error = error0 | error1;
    463 
    464 	/*
    465 	 * Clear the APIC error status (do this on all cpus that enter here)
    466 	 * (two writes are required due to the semantics of accessing the
    467 	 * error status register.)
    468 	 */
    469 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    470 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    471 
    472 	/*
    473 	 * Prevent more than 1 CPU from handling error interrupt causing
    474 	 * double printing (interleave of characters from multiple
    475 	 * CPU's when using prom_printf)
    476 	 */
    477 	if (lock_try(&apic_error_lock) == 0)
    478 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
    479 	if (error) {
    480 #if	DEBUG
    481 		if (apic_debug)
    482 			debug_enter("pcplusmp: APIC Error interrupt received");
    483 #endif /* DEBUG */
    484 		if (apic_panic_on_apic_error)
    485 			cmn_err(CE_PANIC,
    486 			    "APIC Error interrupt on CPU %d. Status = %x\n",
    487 			    psm_get_cpu_id(), error);
    488 		else {
    489 			if ((error & ~APIC_CS_ERRORS) == 0) {
    490 				/* cksum error only */
    491 				apic_error |= APIC_ERR_APIC_ERROR;
    492 				apic_apic_error |= error;
    493 				apic_num_apic_errors++;
    494 				apic_num_cksum_errors++;
    495 			} else {
    496 				/*
    497 				 * prom_printf is the best shot we have of
    498 				 * something which is problem free from
    499 				 * high level/NMI type of interrupts
    500 				 */
    501 				prom_printf("APIC Error interrupt on CPU %d. "
    502 				    "Status 0 = %x, Status 1 = %x\n",
    503 				    psm_get_cpu_id(), error0, error1);
    504 				apic_error |= APIC_ERR_APIC_ERROR;
    505 				apic_apic_error |= error;
    506 				apic_num_apic_errors++;
    507 				for (i = 0; i < apic_error_display_delay; i++) {
    508 					tenmicrosec();
    509 				}
    510 				/*
    511 				 * provide more delay next time limited to
    512 				 * roughly 1 clock tick time
    513 				 */
    514 				if (apic_error_display_delay < 500)
    515 					apic_error_display_delay *= 2;
    516 			}
    517 		}
    518 		lock_clear(&apic_error_lock);
    519 		return (DDI_INTR_CLAIMED);
    520 	} else {
    521 		lock_clear(&apic_error_lock);
    522 		return (DDI_INTR_UNCLAIMED);
    523 	}
    524 	/* NOTREACHED */
    525 }
    526 
    527 /*
    528  * Turn off the mask bit in the performance counter Local Vector Table entry.
    529  */
    530 static void
    531 apic_cpcovf_mask_clear(void)
    532 {
    533 	apic_reg_ops->apic_write(APIC_PCINT_VECT,
    534 	    (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
    535 }
    536 
    537 /*ARGSUSED*/
    538 static int
    539 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
    540 {
    541 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
    542 	return (0);
    543 }
    544 
    545 /*ARGSUSED*/
    546 static int
    547 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
    548 {
    549 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
    550 	return (0);
    551 }
    552 
    553 /*ARGSUSED*/
    554 static int
    555 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
    556 {
    557 	cpuset_t	cpu_set;
    558 
    559 	CPUSET_ONLY(cpu_set, cpuid);
    560 
    561 	switch (what) {
    562 		case CPU_ON:
    563 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
    564 			    (xc_func_t)apic_cmci_enable);
    565 			break;
    566 
    567 		case CPU_OFF:
    568 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
    569 			    (xc_func_t)apic_cmci_disable);
    570 			break;
    571 
    572 		default:
    573 			break;
    574 	}
    575 
    576 	return (0);
    577 }
    578 
    579 static void
    580 apic_init_intr()
    581 {
    582 	processorid_t	cpun = psm_get_cpu_id();
    583 	uint_t nlvt;
    584 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
    585 
    586 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
    587 
    588 	if (apic_mode == LOCAL_APIC) {
    589 		/*
    590 		 * We are running APIC in MMIO mode.
    591 		 */
    592 		if (apic_flat_model) {
    593 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
    594 			    APIC_FLAT_MODEL);
    595 		} else {
    596 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
    597 			    APIC_CLUSTER_MODEL);
    598 		}
    599 
    600 		apic_reg_ops->apic_write(APIC_DEST_REG,
    601 		    AV_HIGH_ORDER >> cpun);
    602 	}
    603 
    604 	if (apic_directed_EOI_supported()) {
    605 		/*
    606 		 * Setting the 12th bit in the Spurious Interrupt Vector
    607 		 * Register suppresses broadcast EOIs generated by the local
    608 		 * APIC. The suppression of broadcast EOIs happens only when
    609 		 * interrupts are level-triggered.
    610 		 */
    611 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
    612 	}
    613 
    614 	/* need to enable APIC before unmasking NMI */
    615 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
    616 
    617 	/*
    618 	 * Presence of an invalid vector with delivery mode AV_FIXED can
    619 	 * cause an error interrupt, even if the entry is masked...so
    620 	 * write a valid vector to LVT entries along with the mask bit
    621 	 */
    622 
    623 	/* All APICs have timer and LINT0/1 */
    624 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
    625 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
    626 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
    627 
    628 	/*
    629 	 * On integrated APICs, the number of LVT entries is
    630 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
    631 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
    632 	 */
    633 
    634 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
    635 		nlvt = 3;
    636 	} else {
    637 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
    638 		    0xFF) + 1;
    639 	}
    640 
    641 	if (nlvt >= 5) {
    642 		/* Enable performance counter overflow interrupt */
    643 
    644 		if ((x86_feature & X86_MSR) != X86_MSR)
    645 			apic_enable_cpcovf_intr = 0;
    646 		if (apic_enable_cpcovf_intr) {
    647 			if (apic_cpcovf_vect == 0) {
    648 				int ipl = APIC_PCINT_IPL;
    649 				int irq = apic_get_ipivect(ipl, -1);
    650 
    651 				ASSERT(irq != -1);
    652 				apic_cpcovf_vect =
    653 				    apic_irq_table[irq]->airq_vector;
    654 				ASSERT(apic_cpcovf_vect);
    655 				(void) add_avintr(NULL, ipl,
    656 				    (avfunc)kcpc_hw_overflow_intr,
    657 				    "apic pcint", irq, NULL, NULL, NULL, NULL);
    658 				kcpc_hw_overflow_intr_installed = 1;
    659 				kcpc_hw_enable_cpc_intr =
    660 				    apic_cpcovf_mask_clear;
    661 			}
    662 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
    663 			    apic_cpcovf_vect);
    664 		}
    665 	}
    666 
    667 	if (nlvt >= 6) {
    668 		/* Only mask TM intr if the BIOS apparently doesn't use it */
    669 
    670 		uint32_t lvtval;
    671 
    672 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
    673 		if (((lvtval & AV_MASK) == AV_MASK) ||
    674 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
    675 			apic_reg_ops->apic_write(APIC_THERM_VECT,
    676 			    AV_MASK|APIC_RESV_IRQ);
    677 		}
    678 	}
    679 
    680 	/* Enable error interrupt */
    681 
    682 	if (nlvt >= 4 && apic_enable_error_intr) {
    683 		if (apic_errvect == 0) {
    684 			int ipl = 0xf;	/* get highest priority intr */
    685 			int irq = apic_get_ipivect(ipl, -1);
    686 
    687 			ASSERT(irq != -1);
    688 			apic_errvect = apic_irq_table[irq]->airq_vector;
    689 			ASSERT(apic_errvect);
    690 			/*
    691 			 * Not PSMI compliant, but we are going to merge
    692 			 * with ON anyway
    693 			 */
    694 			(void) add_avintr((void *)NULL, ipl,
    695 			    (avfunc)apic_error_intr, "apic error intr",
    696 			    irq, NULL, NULL, NULL, NULL);
    697 		}
    698 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
    699 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    700 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    701 	}
    702 
    703 	/* Enable CMCI interrupt */
    704 	if (cmi_enable_cmci) {
    705 
    706 		mutex_enter(&cmci_cpu_setup_lock);
    707 		if (cmci_cpu_setup_registered == 0) {
    708 			mutex_enter(&cpu_lock);
    709 			register_cpu_setup_func(cmci_cpu_setup, NULL);
    710 			mutex_exit(&cpu_lock);
    711 			cmci_cpu_setup_registered = 1;
    712 		}
    713 		mutex_exit(&cmci_cpu_setup_lock);
    714 
    715 		if (apic_cmci_vect == 0) {
    716 			int ipl = 0x2;
    717 			int irq = apic_get_ipivect(ipl, -1);
    718 
    719 			ASSERT(irq != -1);
    720 			apic_cmci_vect = apic_irq_table[irq]->airq_vector;
    721 			ASSERT(apic_cmci_vect);
    722 
    723 			(void) add_avintr(NULL, ipl,
    724 			    (avfunc)cmi_cmci_trap,
    725 			    "apic cmci intr", irq, NULL, NULL, NULL, NULL);
    726 		}
    727 		apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
    728 	}
    729 }
    730 
    731 static void
    732 apic_disable_local_apic()
    733 {
    734 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
    735 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
    736 
    737 	/* local intr reg 0 */
    738 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
    739 
    740 	/* disable NMI */
    741 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
    742 
    743 	/* and error interrupt */
    744 	apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
    745 
    746 	/* and perf counter intr */
    747 	apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
    748 
    749 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
    750 }
    751 
    752 static void
    753 apic_picinit(void)
    754 {
    755 	int i, j;
    756 	uint_t isr;
    757 
    758 	/*
    759 	 * Initialize and enable interrupt remapping before apic
    760 	 * hardware initialization
    761 	 */
    762 	apic_intrmap_init(apic_mode);
    763 
    764 	/*
    765 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
    766 	 * bit on without clearing it with EOI.  Since softint
    767 	 * uses vector 0x20 to interrupt itself, so softint will
    768 	 * not work on this machine.  In order to fix this problem
    769 	 * a check is made to verify all the isr bits are clear.
    770 	 * If not, EOIs are issued to clear the bits.
    771 	 */
    772 	for (i = 7; i >= 1; i--) {
    773 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
    774 		if (isr != 0)
    775 			for (j = 0; ((j < 32) && (isr != 0)); j++)
    776 				if (isr & (1 << j)) {
    777 					apic_reg_ops->apic_write(
    778 					    APIC_EOI_REG, 0);
    779 					isr &= ~(1 << j);
    780 					apic_error |= APIC_ERR_BOOT_EOI;
    781 				}
    782 	}
    783 
    784 	/* set a flag so we know we have run apic_picinit() */
    785 	apic_picinit_called = 1;
    786 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
    787 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
    788 	LOCK_INIT_CLEAR(&apic_error_lock);
    789 
    790 	picsetup();	 /* initialise the 8259 */
    791 
    792 	/* add nmi handler - least priority nmi handler */
    793 	LOCK_INIT_CLEAR(&apic_nmi_lock);
    794 
    795 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
    796 	    "pcplusmp NMI handler", (caddr_t)NULL))
    797 		cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler");
    798 
    799 	/*
    800 	 * Check for directed-EOI capability in the local APIC.
    801 	 */
    802 	if (apic_directed_EOI_supported() == 1) {
    803 		apic_set_directed_EOI_handler();
    804 	}
    805 
    806 	apic_init_intr();
    807 
    808 	/* enable apic mode if imcr present */
    809 	if (apic_imcrp) {
    810 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
    811 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
    812 	}
    813 
    814 	ioapic_init_intr(IOAPIC_MASK);
    815 }
    816 
    817 
    818 /*ARGSUSED1*/
    819 static int
    820 apic_cpu_start(processorid_t cpun, caddr_t arg)
    821 {
    822 	int		loop_count;
    823 	uint32_t	vector;
    824 	uint_t		cpu_id;
    825 	ulong_t		iflag;
    826 
    827 	cpu_id =  apic_cpus[cpun].aci_local_id;
    828 
    829 	apic_cmos_ssb_set = 1;
    830 
    831 	/*
    832 	 * Interrupts on BSP cpu will be disabled during these startup
    833 	 * steps in order to avoid unwanted side effects from
    834 	 * executing interrupt handlers on a problematic BIOS.
    835 	 */
    836 
    837 	iflag = intr_clear();
    838 	outb(CMOS_ADDR, SSB);
    839 	outb(CMOS_DATA, BIOS_SHUTDOWN);
    840 
    841 	/*
    842 	 * According to X2APIC specification in section '2.3.5.1' of
    843 	 * Interrupt Command Register Semantics, the semantics of
    844 	 * programming the Interrupt Command Register to dispatch an interrupt
    845 	 * is simplified. A single MSR write to the 64-bit ICR is required
    846 	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
    847 	 * interface to ICR, system software is not required to check the
    848 	 * status of the delivery status bit prior to writing to the ICR
    849 	 * to send an IPI. With the removal of the Delivery Status bit,
    850 	 * system software no longer has a reason to read the ICR. It remains
    851 	 * readable only to aid in debugging.
    852 	 */
    853 #ifdef	DEBUG
    854 	APIC_AV_PENDING_SET();
    855 #else
    856 	if (apic_mode == LOCAL_APIC) {
    857 		APIC_AV_PENDING_SET();
    858 	}
    859 #endif /* DEBUG */
    860 
    861 	/* for integrated - make sure there is one INIT IPI in buffer */
    862 	/* for external - it will wake up the cpu */
    863 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_ASSERT | AV_RESET);
    864 
    865 	/* If only 1 CPU is installed, PENDING bit will not go low */
    866 	for (loop_count = 0x1000; loop_count; loop_count--) {
    867 		if (apic_mode == LOCAL_APIC &&
    868 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
    869 			apic_ret();
    870 		else
    871 			break;
    872 	}
    873 
    874 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_DEASSERT | AV_RESET);
    875 
    876 	drv_usecwait(20000);		/* 20 milli sec */
    877 
    878 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
    879 		/* integrated apic */
    880 
    881 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
    882 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
    883 
    884 		/* to offset the INIT IPI queue up in the buffer */
    885 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
    886 
    887 		drv_usecwait(200);		/* 20 micro sec */
    888 
    889 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
    890 
    891 		drv_usecwait(200);		/* 20 micro sec */
    892 	}
    893 	intr_restore(iflag);
    894 	return (0);
    895 }
    896 
    897 
    898 #ifdef	DEBUG
    899 int	apic_break_on_cpu = 9;
    900 int	apic_stretch_interrupts = 0;
    901 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
    902 
    903 void
    904 apic_break()
    905 {
    906 }
    907 #endif /* DEBUG */
    908 
    909 /*
    910  * platform_intr_enter
    911  *
    912  *	Called at the beginning of the interrupt service routine to
    913  *	mask all level equal to and below the interrupt priority
    914  *	of the interrupting vector.  An EOI should be given to
    915  *	the interrupt controller to enable other HW interrupts.
    916  *
    917  *	Return -1 for spurious interrupts
    918  *
    919  */
    920 /*ARGSUSED*/
    921 static int
    922 apic_intr_enter(int ipl, int *vectorp)
    923 {
    924 	uchar_t vector;
    925 	int nipl;
    926 	int irq;
    927 	ulong_t iflag;
    928 	apic_cpus_info_t *cpu_infop;
    929 
    930 	/*
    931 	 * The real vector delivered is (*vectorp + 0x20), but our caller
    932 	 * subtracts 0x20 from the vector before passing it to us.
    933 	 * (That's why APIC_BASE_VECT is 0x20.)
    934 	 */
    935 	vector = (uchar_t)*vectorp;
    936 
    937 	/* if interrupted by the clock, increment apic_nsec_since_boot */
    938 	if (vector == apic_clkvect) {
    939 		if (!apic_oneshot) {
    940 			/* NOTE: this is not MT aware */
    941 			apic_hrtime_stamp++;
    942 			apic_nsec_since_boot += apic_nsec_per_intr;
    943 			apic_hrtime_stamp++;
    944 			last_count_read = apic_hertz_count;
    945 			apic_redistribute_compute();
    946 		}
    947 
    948 		/* We will avoid all the book keeping overhead for clock */
    949 		nipl = apic_ipls[vector];
    950 
    951 		*vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT];
    952 		if (apic_mode == LOCAL_APIC) {
    953 #if defined(__amd64)
    954 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
    955 			    APIC_IPL_SHIFT));
    956 #else
    957 			if (apic_have_32bit_cr8)
    958 				setcr8((ulong_t)(apic_ipltopri[nipl] >>
    959 				    APIC_IPL_SHIFT));
    960 			else
    961 				LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
    962 				    (uint32_t)apic_ipltopri[nipl]);
    963 #endif
    964 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
    965 		} else {
    966 			X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
    967 			X2APIC_WRITE(APIC_EOI_REG, 0);
    968 		}
    969 
    970 		return (nipl);
    971 	}
    972 
    973 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
    974 
    975 	if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) {
    976 		cpu_infop->aci_spur_cnt++;
    977 		return (APIC_INT_SPURIOUS);
    978 	}
    979 
    980 	/* Check if the vector we got is really what we need */
    981 	if (apic_revector_pending) {
    982 		/*
    983 		 * Disable interrupts for the duration of
    984 		 * the vector translation to prevent a self-race for
    985 		 * the apic_revector_lock.  This cannot be done
    986 		 * in apic_xlate_vector because it is recursive and
    987 		 * we want the vector translation to be atomic with
    988 		 * respect to other (higher-priority) interrupts.
    989 		 */
    990 		iflag = intr_clear();
    991 		vector = apic_xlate_vector(vector + APIC_BASE_VECT) -
    992 		    APIC_BASE_VECT;
    993 		intr_restore(iflag);
    994 	}
    995 
    996 	nipl = apic_ipls[vector];
    997 	*vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT];
    998 
    999 	if (apic_mode == LOCAL_APIC) {
   1000 #if defined(__amd64)
   1001 		setcr8((ulong_t)(apic_ipltopri[nipl] >> APIC_IPL_SHIFT));
   1002 #else
   1003 		if (apic_have_32bit_cr8)
   1004 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
   1005 			    APIC_IPL_SHIFT));
   1006 		else
   1007 			LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
   1008 			    (uint32_t)apic_ipltopri[nipl]);
   1009 #endif
   1010 	} else {
   1011 		X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
   1012 	}
   1013 
   1014 	cpu_infop->aci_current[nipl] = (uchar_t)irq;
   1015 	cpu_infop->aci_curipl = (uchar_t)nipl;
   1016 	cpu_infop->aci_ISR_in_progress |= 1 << nipl;
   1017 
   1018 	/*
   1019 	 * apic_level_intr could have been assimilated into the irq struct.
   1020 	 * but, having it as a character array is more efficient in terms of
   1021 	 * cache usage. So, we leave it as is.
   1022 	 */
   1023 	if (!apic_level_intr[irq]) {
   1024 		if (apic_mode == LOCAL_APIC) {
   1025 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
   1026 		} else {
   1027 			X2APIC_WRITE(APIC_EOI_REG, 0);
   1028 		}
   1029 	}
   1030 
   1031 #ifdef	DEBUG
   1032 	APIC_DEBUG_BUF_PUT(vector);
   1033 	APIC_DEBUG_BUF_PUT(irq);
   1034 	APIC_DEBUG_BUF_PUT(nipl);
   1035 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
   1036 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
   1037 		drv_usecwait(apic_stretch_interrupts);
   1038 
   1039 	if (apic_break_on_cpu == psm_get_cpu_id())
   1040 		apic_break();
   1041 #endif /* DEBUG */
   1042 	return (nipl);
   1043 }
   1044 
   1045 /*
   1046  * This macro is a common code used by MMIO local apic and X2APIC
   1047  * local apic.
   1048  */
   1049 #define	APIC_INTR_EXIT() \
   1050 { \
   1051 	cpu_infop = &apic_cpus[psm_get_cpu_id()]; \
   1052 	if (apic_level_intr[irq]) \
   1053 		apic_reg_ops->apic_send_eoi(irq); \
   1054 	cpu_infop->aci_curipl = (uchar_t)prev_ipl; \
   1055 	/* ISR above current pri could not be in progress */ \
   1056 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; \
   1057 }
   1058 
   1059 /*
   1060  * Any changes made to this function must also change X2APIC
   1061  * version of intr_exit.
   1062  */
   1063 void
   1064 apic_intr_exit(int prev_ipl, int irq)
   1065 {
   1066 	apic_cpus_info_t *cpu_infop;
   1067 
   1068 #if defined(__amd64)
   1069 	setcr8((ulong_t)apic_cr8pri[prev_ipl]);
   1070 #else
   1071 	if (apic_have_32bit_cr8)
   1072 		setcr8((ulong_t)(apic_ipltopri[prev_ipl] >> APIC_IPL_SHIFT));
   1073 	else
   1074 		apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl];
   1075 #endif
   1076 
   1077 	APIC_INTR_EXIT();
   1078 }
   1079 
   1080 /*
   1081  * Same as apic_intr_exit() except it uses MSR rather than MMIO
   1082  * to access local apic registers.
   1083  */
   1084 void
   1085 x2apic_intr_exit(int prev_ipl, int irq)
   1086 {
   1087 	apic_cpus_info_t *cpu_infop;
   1088 
   1089 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[prev_ipl]);
   1090 	APIC_INTR_EXIT();
   1091 }
   1092 
   1093 intr_exit_fn_t
   1094 psm_intr_exit_fn(void)
   1095 {
   1096 	if (apic_mode == LOCAL_X2APIC)
   1097 		return (x2apic_intr_exit);
   1098 
   1099 	return (apic_intr_exit);
   1100 }
   1101 
   1102 /*
   1103  * Mask all interrupts below or equal to the given IPL.
   1104  * Any changes made to this function must also change X2APIC
   1105  * version of setspl.
   1106  */
   1107 static void
   1108 apic_setspl(int ipl)
   1109 {
   1110 #if defined(__amd64)
   1111 	setcr8((ulong_t)apic_cr8pri[ipl]);
   1112 #else
   1113 	if (apic_have_32bit_cr8)
   1114 		setcr8((ulong_t)(apic_ipltopri[ipl] >> APIC_IPL_SHIFT));
   1115 	else
   1116 		apicadr[APIC_TASK_REG] = apic_ipltopri[ipl];
   1117 #endif
   1118 
   1119 	/* interrupts at ipl above this cannot be in progress */
   1120 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
   1121 	/*
   1122 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
   1123 	 * have enough time to come in before the priority is raised again
   1124 	 * during the idle() loop.
   1125 	 */
   1126 	if (apic_setspl_delay)
   1127 		(void) apic_reg_ops->apic_get_pri();
   1128 }
   1129 
   1130 /*
   1131  * X2APIC version of setspl.
   1132  * Mask all interrupts below or equal to the given IPL
   1133  */
   1134 static void
   1135 x2apic_setspl(int ipl)
   1136 {
   1137 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[ipl]);
   1138 
   1139 	/* interrupts at ipl above this cannot be in progress */
   1140 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
   1141 }
   1142 
   1143 /*
   1144  * generates an interprocessor interrupt to another CPU. Any changes made to
   1145  * this routine must be accompanied by similar changes to
   1146  * apic_common_send_ipi().
   1147  */
   1148 static void
   1149 apic_send_ipi(int cpun, int ipl)
   1150 {
   1151 	int vector;
   1152 	ulong_t flag;
   1153 
   1154 	vector = apic_resv_vector[ipl];
   1155 
   1156 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
   1157 
   1158 	flag = intr_clear();
   1159 
   1160 	APIC_AV_PENDING_SET();
   1161 
   1162 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
   1163 	    vector);
   1164 
   1165 	intr_restore(flag);
   1166 }
   1167 
   1168 
   1169 /*ARGSUSED*/
   1170 static void
   1171 apic_set_idlecpu(processorid_t cpun)
   1172 {
   1173 }
   1174 
   1175 /*ARGSUSED*/
   1176 static void
   1177 apic_unset_idlecpu(processorid_t cpun)
   1178 {
   1179 }
   1180 
   1181 
   1182 void
   1183 apic_ret()
   1184 {
   1185 }
   1186 
   1187 /*
   1188  * If apic_coarse_time == 1, then apic_gettime() is used instead of
   1189  * apic_gethrtime().  This is used for performance instead of accuracy.
   1190  */
   1191 
   1192 static hrtime_t
   1193 apic_gettime()
   1194 {
   1195 	int old_hrtime_stamp;
   1196 	hrtime_t temp;
   1197 
   1198 	/*
   1199 	 * In one-shot mode, we do not keep time, so if anyone
   1200 	 * calls psm_gettime() directly, we vector over to
   1201 	 * gethrtime().
   1202 	 * one-shot mode MUST NOT be enabled if this psm is the source of
   1203 	 * hrtime.
   1204 	 */
   1205 
   1206 	if (apic_oneshot)
   1207 		return (gethrtime());
   1208 
   1209 
   1210 gettime_again:
   1211 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
   1212 		apic_ret();
   1213 
   1214 	temp = apic_nsec_since_boot;
   1215 
   1216 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
   1217 		goto gettime_again;
   1218 	}
   1219 	return (temp);
   1220 }
   1221 
   1222 /*
   1223  * Here we return the number of nanoseconds since booting.  Note every
   1224  * clock interrupt increments apic_nsec_since_boot by the appropriate
   1225  * amount.
   1226  */
   1227 static hrtime_t
   1228 apic_gethrtime()
   1229 {
   1230 	int curr_timeval, countval, elapsed_ticks;
   1231 	int old_hrtime_stamp, status;
   1232 	hrtime_t temp;
   1233 	uint32_t cpun;
   1234 	ulong_t oflags;
   1235 
   1236 	/*
   1237 	 * In one-shot mode, we do not keep time, so if anyone
   1238 	 * calls psm_gethrtime() directly, we vector over to
   1239 	 * gethrtime().
   1240 	 * one-shot mode MUST NOT be enabled if this psm is the source of
   1241 	 * hrtime.
   1242 	 */
   1243 
   1244 	if (apic_oneshot)
   1245 		return (gethrtime());
   1246 
   1247 	oflags = intr_clear();	/* prevent migration */
   1248 
   1249 	cpun = apic_reg_ops->apic_read(APIC_LID_REG);
   1250 	if (apic_mode == LOCAL_APIC)
   1251 		cpun >>= APIC_ID_BIT_OFFSET;
   1252 
   1253 	lock_set(&apic_gethrtime_lock);
   1254 
   1255 gethrtime_again:
   1256 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
   1257 		apic_ret();
   1258 
   1259 	/*
   1260 	 * Check to see which CPU we are on.  Note the time is kept on
   1261 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
   1262 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
   1263 	 */
   1264 	if (cpun == apic_cpus[0].aci_local_id) {
   1265 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
   1266 	} else {
   1267 #ifdef	DEBUG
   1268 		APIC_AV_PENDING_SET();
   1269 #else
   1270 		if (apic_mode == LOCAL_APIC)
   1271 			APIC_AV_PENDING_SET();
   1272 #endif /* DEBUG */
   1273 
   1274 		apic_reg_ops->apic_write_int_cmd(
   1275 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
   1276 
   1277 		while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
   1278 		    & AV_READ_PENDING) {
   1279 			apic_ret();
   1280 		}
   1281 
   1282 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
   1283 			countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
   1284 		else {	/* 0 = invalid */
   1285 			apic_remote_hrterr++;
   1286 			/*
   1287 			 * return last hrtime right now, will need more
   1288 			 * testing if change to retry
   1289 			 */
   1290 			temp = apic_last_hrtime;
   1291 
   1292 			lock_clear(&apic_gethrtime_lock);
   1293 
   1294 			intr_restore(oflags);
   1295 
   1296 			return (temp);
   1297 		}
   1298 	}
   1299 	if (countval > last_count_read)
   1300 		countval = 0;
   1301 	else
   1302 		last_count_read = countval;
   1303 
   1304 	elapsed_ticks = apic_hertz_count - countval;
   1305 
   1306 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
   1307 	temp = apic_nsec_since_boot + curr_timeval;
   1308 
   1309 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
   1310 		/* we might have clobbered last_count_read. Restore it */
   1311 		last_count_read = apic_hertz_count;
   1312 		goto gethrtime_again;
   1313 	}
   1314 
   1315 	if (temp < apic_last_hrtime) {
   1316 		/* return last hrtime if error occurs */
   1317 		apic_hrtime_error++;
   1318 		temp = apic_last_hrtime;
   1319 	}
   1320 	else
   1321 		apic_last_hrtime = temp;
   1322 
   1323 	lock_clear(&apic_gethrtime_lock);
   1324 	intr_restore(oflags);
   1325 
   1326 	return (temp);
   1327 }
   1328 
   1329 /* apic NMI handler */
   1330 /*ARGSUSED*/
   1331 static void
   1332 apic_nmi_intr(caddr_t arg, struct regs *rp)
   1333 {
   1334 	if (apic_shutdown_processors) {
   1335 		apic_disable_local_apic();
   1336 		return;
   1337 	}
   1338 
   1339 	apic_error |= APIC_ERR_NMI;
   1340 
   1341 	if (!lock_try(&apic_nmi_lock))
   1342 		return;
   1343 	apic_num_nmis++;
   1344 
   1345 	if (apic_kmdb_on_nmi && psm_debugger()) {
   1346 		debug_enter("NMI received: entering kmdb\n");
   1347 	} else if (apic_panic_on_nmi) {
   1348 		/* Keep panic from entering kmdb. */
   1349 		nopanicdebug = 1;
   1350 		panic("NMI received\n");
   1351 	} else {
   1352 		/*
   1353 		 * prom_printf is the best shot we have of something which is
   1354 		 * problem free from high level/NMI type of interrupts
   1355 		 */
   1356 		prom_printf("NMI received\n");
   1357 	}
   1358 
   1359 	lock_clear(&apic_nmi_lock);
   1360 }
   1361 
   1362 /*ARGSUSED*/
   1363 static int
   1364 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
   1365 {
   1366 	return (apic_addspl_common(irqno, ipl, min_ipl, max_ipl));
   1367 }
   1368 
   1369 static int
   1370 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
   1371 {
   1372 	return (apic_delspl_common(irqno, ipl, min_ipl,  max_ipl));
   1373 }
   1374 
   1375 static int
   1376 apic_post_cpu_start()
   1377 {
   1378 	int cpun;
   1379 	static int cpus_started = 1;
   1380 	struct psm_ops *pops = &apic_ops;
   1381 
   1382 	/* We know this CPU + BSP  started successfully. */
   1383 	cpus_started++;
   1384 
   1385 	/*
   1386 	 * On BSP we would have enabled X2APIC, if supported by processor,
   1387 	 * in acpi_probe(), but on AP we do it here.
   1388 	 *
   1389 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
   1390 	 * local APIC mode of the current CPU is MMIO (xAPIC).
   1391 	 */
   1392 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
   1393 	    apic_local_mode() == LOCAL_APIC) {
   1394 		apic_enable_x2apic();
   1395 	}
   1396 
   1397 	/*
   1398 	 * We change psm_send_ipi and send_dirintf only if Solaris
   1399 	 * is booted in kmdb & the current CPU is the last CPU being
   1400 	 * brought up. We don't need to do anything if Solaris is running
   1401 	 * in MMIO mode (xAPIC).
   1402 	 */
   1403 	if ((boothowto & RB_DEBUG) &&
   1404 	    (cpus_started == boot_ncpus || cpus_started == apic_nproc) &&
   1405 	    apic_mode == LOCAL_X2APIC) {
   1406 		/*
   1407 		 * We no longer need help from apic_common_send_ipi()
   1408 		 * since we will not start any more CPUs.
   1409 		 *
   1410 		 * We will need to revisit this if we start supporting
   1411 		 * hot-plugging of CPUs.
   1412 		 */
   1413 		pops->psm_send_ipi = x2apic_send_ipi;
   1414 		send_dirintf = pops->psm_send_ipi;
   1415 	}
   1416 
   1417 	splx(ipltospl(LOCK_LEVEL));
   1418 	apic_init_intr();
   1419 
   1420 	/*
   1421 	 * since some systems don't enable the internal cache on the non-boot
   1422 	 * cpus, so we have to enable them here
   1423 	 */
   1424 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
   1425 
   1426 #ifdef	DEBUG
   1427 	APIC_AV_PENDING_SET();
   1428 #else
   1429 	if (apic_mode == LOCAL_APIC)
   1430 		APIC_AV_PENDING_SET();
   1431 #endif	/* DEBUG */
   1432 
   1433 	/*
   1434 	 * We may be booting, or resuming from suspend; aci_status will
   1435 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
   1436 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
   1437 	 */
   1438 	cpun = psm_get_cpu_id();
   1439 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
   1440 
   1441 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
   1442 	return (PSM_SUCCESS);
   1443 }
   1444 
   1445 processorid_t
   1446 apic_get_next_processorid(processorid_t cpu_id)
   1447 {
   1448 
   1449 	int i;
   1450 
   1451 	if (cpu_id == -1)
   1452 		return ((processorid_t)0);
   1453 
   1454 	for (i = cpu_id + 1; i < NCPU; i++) {
   1455 		if (CPU_IN_SET(apic_cpumask, i))
   1456 			return (i);
   1457 	}
   1458 
   1459 	return ((processorid_t)-1);
   1460 }
   1461 
   1462 
   1463 /*
   1464  * type == -1 indicates it is an internal request. Do not change
   1465  * resv_vector for these requests
   1466  */
   1467 static int
   1468 apic_get_ipivect(int ipl, int type)
   1469 {
   1470 	uchar_t vector;
   1471 	int irq;
   1472 
   1473 	if ((irq = apic_allocate_irq(APIC_VECTOR(ipl))) != -1) {
   1474 		if (vector = apic_allocate_vector(ipl, irq, 1)) {
   1475 			apic_irq_table[irq]->airq_mps_intr_index =
   1476 			    RESERVE_INDEX;
   1477 			apic_irq_table[irq]->airq_vector = vector;
   1478 			if (type != -1) {
   1479 				apic_resv_vector[ipl] = vector;
   1480 			}
   1481 			return (irq);
   1482 		}
   1483 	}
   1484 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
   1485 	return (-1);	/* shouldn't happen */
   1486 }
   1487 
   1488 static int
   1489 apic_getclkirq(int ipl)
   1490 {
   1491 	int	irq;
   1492 
   1493 	if ((irq = apic_get_ipivect(ipl, -1)) == -1)
   1494 		return (-1);
   1495 	/*
   1496 	 * Note the vector in apic_clkvect for per clock handling.
   1497 	 */
   1498 	apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT;
   1499 	APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n",
   1500 	    apic_clkvect));
   1501 	return (irq);
   1502 }
   1503 
   1504 
   1505 /*
   1506  * Return the number of APIC clock ticks elapsed for 8245 to decrement
   1507  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
   1508  */
   1509 static uint_t
   1510 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
   1511 {
   1512 	uint8_t		pit_tick_lo;
   1513 	uint16_t	pit_tick, target_pit_tick;
   1514 	uint32_t	start_apic_tick, end_apic_tick;
   1515 	ulong_t		iflag;
   1516 	uint32_t	reg;
   1517 
   1518 	reg = addr + APIC_CURR_COUNT - apicadr;
   1519 
   1520 	iflag = intr_clear();
   1521 
   1522 	do {
   1523 		pit_tick_lo = inb(PITCTR0_PORT);
   1524 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
   1525 	} while (pit_tick < APIC_TIME_MIN ||
   1526 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
   1527 
   1528 	/*
   1529 	 * Wait for the 8254 to decrement by 5 ticks to ensure
   1530 	 * we didn't start in the middle of a tick.
   1531 	 * Compare with 0x10 for the wrap around case.
   1532 	 */
   1533 	target_pit_tick = pit_tick - 5;
   1534 	do {
   1535 		pit_tick_lo = inb(PITCTR0_PORT);
   1536 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
   1537 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
   1538 
   1539 	start_apic_tick = apic_reg_ops->apic_read(reg);
   1540 
   1541 	/*
   1542 	 * Wait for the 8254 to decrement by
   1543 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
   1544 	 */
   1545 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
   1546 	do {
   1547 		pit_tick_lo = inb(PITCTR0_PORT);
   1548 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
   1549 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
   1550 
   1551 	end_apic_tick = apic_reg_ops->apic_read(reg);
   1552 
   1553 	*pit_ticks_adj = target_pit_tick - pit_tick;
   1554 
   1555 	intr_restore(iflag);
   1556 
   1557 	return (start_apic_tick - end_apic_tick);
   1558 }
   1559 
   1560 /*
   1561  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
   1562  * frequency.  Note at this stage in the boot sequence, the boot processor
   1563  * is the only active processor.
   1564  * hertz value of 0 indicates a one-shot mode request.  In this case
   1565  * the function returns the resolution (in nanoseconds) for the hardware
   1566  * timer interrupt.  If one-shot mode capability is not available,
   1567  * the return value will be 0. apic_enable_oneshot is a global switch
   1568  * for disabling the functionality.
   1569  * A non-zero positive value for hertz indicates a periodic mode request.
   1570  * In this case the hardware will be programmed to generate clock interrupts
   1571  * at hertz frequency and returns the resolution of interrupts in
   1572  * nanosecond.
   1573  */
   1574 
   1575 static int
   1576 apic_clkinit(int hertz)
   1577 {
   1578 	uint_t		apic_ticks = 0;
   1579 	uint_t		pit_ticks;
   1580 	int		ret;
   1581 	uint16_t	pit_ticks_adj;
   1582 	static int	firsttime = 1;
   1583 
   1584 	if (firsttime) {
   1585 		/* first time calibrate on CPU0 only */
   1586 
   1587 		apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
   1588 		apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
   1589 		apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
   1590 
   1591 		/* total number of PIT ticks corresponding to apic_ticks */
   1592 		pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
   1593 
   1594 		/*
   1595 		 * Determine the number of nanoseconds per APIC clock tick
   1596 		 * and then determine how many APIC ticks to interrupt at the
   1597 		 * desired frequency
   1598 		 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
   1599 		 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
   1600 		 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
   1601 		 * pic_ticks_per_SFns =
   1602 		 *   (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
   1603 		 */
   1604 		apic_ticks_per_SFnsecs =
   1605 		    ((SF * apic_ticks * PIT_HZ) /
   1606 		    ((uint64_t)pit_ticks * NANOSEC));
   1607 
   1608 		/* the interval timer initial count is 32 bit max */
   1609 		apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL);
   1610 		firsttime = 0;
   1611 	}
   1612 
   1613 	if (hertz != 0) {
   1614 		/* periodic */
   1615 		apic_nsec_per_intr = NANOSEC / hertz;
   1616 		apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr);
   1617 	}
   1618 
   1619 	apic_int_busy_mark = (apic_int_busy_mark *
   1620 	    apic_sample_factor_redistribution) / 100;
   1621 	apic_int_free_mark = (apic_int_free_mark *
   1622 	    apic_sample_factor_redistribution) / 100;
   1623 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
   1624 	    apic_sample_factor_redistribution) / 100;
   1625 
   1626 	if (hertz == 0) {
   1627 		/* requested one_shot */
   1628 		if (!tsc_gethrtime_enable || !apic_oneshot_enable)
   1629 			return (0);
   1630 		apic_oneshot = 1;
   1631 		ret = (int)APIC_TICKS_TO_NSECS(1);
   1632 	} else {
   1633 		/* program the local APIC to interrupt at the given frequency */
   1634 		apic_reg_ops->apic_write(APIC_INIT_COUNT, apic_hertz_count);
   1635 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
   1636 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
   1637 		apic_oneshot = 0;
   1638 		ret = NANOSEC / hertz;
   1639 	}
   1640 
   1641 	return (ret);
   1642 
   1643 }
   1644 
   1645 /*
   1646  * apic_preshutdown:
   1647  * Called early in shutdown whilst we can still access filesystems to do
   1648  * things like loading modules which will be required to complete shutdown
   1649  * after filesystems are all unmounted.
   1650  */
   1651 static void
   1652 apic_preshutdown(int cmd, int fcn)
   1653 {
   1654 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
   1655 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
   1656 
   1657 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
   1658 		return;
   1659 	}
   1660 }
   1661 
   1662 static void
   1663 apic_shutdown(int cmd, int fcn)
   1664 {
   1665 	int restarts, attempts;
   1666 	int i;
   1667 	uchar_t	byte;
   1668 	ulong_t iflag;
   1669 
   1670 	hpet_acpi_fini();
   1671 
   1672 	/* Send NMI to all CPUs except self to do per processor shutdown */
   1673 	iflag = intr_clear();
   1674 #ifdef	DEBUG
   1675 	APIC_AV_PENDING_SET();
   1676 #else
   1677 	if (apic_mode == LOCAL_APIC)
   1678 		APIC_AV_PENDING_SET();
   1679 #endif /* DEBUG */
   1680 	apic_shutdown_processors = 1;
   1681 	apic_reg_ops->apic_write(APIC_INT_CMD1,
   1682 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
   1683 
   1684 	/* restore cmos shutdown byte before reboot */
   1685 	if (apic_cmos_ssb_set) {
   1686 		outb(CMOS_ADDR, SSB);
   1687 		outb(CMOS_DATA, 0);
   1688 	}
   1689 
   1690 	ioapic_disable_redirection();
   1691 
   1692 	/*	disable apic mode if imcr present	*/
   1693 	if (apic_imcrp) {
   1694 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
   1695 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
   1696 	}
   1697 
   1698 	apic_disable_local_apic();
   1699 
   1700 	intr_restore(iflag);
   1701 
   1702 	/* remainder of function is for shutdown cases only */
   1703 	if (cmd != A_SHUTDOWN)
   1704 		return;
   1705 
   1706 	/*
   1707 	 * Switch system back into Legacy-Mode if using ACPI and
   1708 	 * not powering-off.  Some BIOSes need to remain in ACPI-mode
   1709 	 * for power-off to succeed (Dell Dimension 4600)
   1710 	 * Do not disable ACPI while doing fastreboot
   1711 	 */
   1712 	if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
   1713 		(void) AcpiDisable();
   1714 
   1715 	if (fcn == AD_FASTREBOOT) {
   1716 		apic_reg_ops->apic_write(APIC_INT_CMD1,
   1717 		    AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
   1718 	}
   1719 
   1720 	/* remainder of function is for shutdown+poweroff case only */
   1721 	if (fcn != AD_POWEROFF)
   1722 		return;
   1723 
   1724 	switch (apic_poweroff_method) {
   1725 		case APIC_POWEROFF_VIA_RTC:
   1726 
   1727 			/* select the extended NVRAM bank in the RTC */
   1728 			outb(CMOS_ADDR, RTC_REGA);
   1729 			byte = inb(CMOS_DATA);
   1730 			outb(CMOS_DATA, (byte | EXT_BANK));
   1731 
   1732 			outb(CMOS_ADDR, PFR_REG);
   1733 
   1734 			/* for Predator must toggle the PAB bit */
   1735 			byte = inb(CMOS_DATA);
   1736 
   1737 			/*
   1738 			 * clear power active bar, wakeup alarm and
   1739 			 * kickstart
   1740 			 */
   1741 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
   1742 			outb(CMOS_DATA, byte);
   1743 
   1744 			/* delay before next write */
   1745 			drv_usecwait(1000);
   1746 
   1747 			/* for S40 the following would suffice */
   1748 			byte = inb(CMOS_DATA);
   1749 
   1750 			/* power active bar control bit */
   1751 			byte |= PAB_CBIT;
   1752 			outb(CMOS_DATA, byte);
   1753 
   1754 			break;
   1755 
   1756 		case APIC_POWEROFF_VIA_ASPEN_BMC:
   1757 			restarts = 0;
   1758 restart_aspen_bmc:
   1759 			if (++restarts == 3)
   1760 				break;
   1761 			attempts = 0;
   1762 			do {
   1763 				byte = inb(MISMIC_FLAG_REGISTER);
   1764 				byte &= MISMIC_BUSY_MASK;
   1765 				if (byte != 0) {
   1766 					drv_usecwait(1000);
   1767 					if (attempts >= 3)
   1768 						goto restart_aspen_bmc;
   1769 					++attempts;
   1770 				}
   1771 			} while (byte != 0);
   1772 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
   1773 			byte = inb(MISMIC_FLAG_REGISTER);
   1774 			byte |= 0x1;
   1775 			outb(MISMIC_FLAG_REGISTER, byte);
   1776 			i = 0;
   1777 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
   1778 			    i++) {
   1779 				attempts = 0;
   1780 				do {
   1781 					byte = inb(MISMIC_FLAG_REGISTER);
   1782 					byte &= MISMIC_BUSY_MASK;
   1783 					if (byte != 0) {
   1784 						drv_usecwait(1000);
   1785 						if (attempts >= 3)
   1786 							goto restart_aspen_bmc;
   1787 						++attempts;
   1788 					}
   1789 				} while (byte != 0);
   1790 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
   1791 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
   1792 				byte = inb(MISMIC_FLAG_REGISTER);
   1793 				byte |= 0x1;
   1794 				outb(MISMIC_FLAG_REGISTER, byte);
   1795 			}
   1796 			break;
   1797 
   1798 		case APIC_POWEROFF_VIA_SITKA_BMC:
   1799 			restarts = 0;
   1800 restart_sitka_bmc:
   1801 			if (++restarts == 3)
   1802 				break;
   1803 			attempts = 0;
   1804 			do {
   1805 				byte = inb(SMS_STATUS_REGISTER);
   1806 				byte &= SMS_STATE_MASK;
   1807 				if ((byte == SMS_READ_STATE) ||
   1808 				    (byte == SMS_WRITE_STATE)) {
   1809 					drv_usecwait(1000);
   1810 					if (attempts >= 3)
   1811 						goto restart_sitka_bmc;
   1812 					++attempts;
   1813 				}
   1814 			} while ((byte == SMS_READ_STATE) ||
   1815 			    (byte == SMS_WRITE_STATE));
   1816 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
   1817 			i = 0;
   1818 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
   1819 			    i++) {
   1820 				attempts = 0;
   1821 				do {
   1822 					byte = inb(SMS_STATUS_REGISTER);
   1823 					byte &= SMS_IBF_MASK;
   1824 					if (byte != 0) {
   1825 						drv_usecwait(1000);
   1826 						if (attempts >= 3)
   1827 							goto restart_sitka_bmc;
   1828 						++attempts;
   1829 					}
   1830 				} while (byte != 0);
   1831 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
   1832 			}
   1833 			break;
   1834 
   1835 		case APIC_POWEROFF_NONE:
   1836 
   1837 			/* If no APIC direct method, we will try using ACPI */
   1838 			if (apic_enable_acpi) {
   1839 				if (acpi_poweroff() == 1)
   1840 					return;
   1841 			} else
   1842 				return;
   1843 
   1844 			break;
   1845 	}
   1846 	/*
   1847 	 * Wait a limited time here for power to go off.
   1848 	 * If the power does not go off, then there was a
   1849 	 * problem and we should continue to the halt which
   1850 	 * prints a message for the user to press a key to
   1851 	 * reboot.
   1852 	 */
   1853 	drv_usecwait(7000000); /* wait seven seconds */
   1854 
   1855 }
   1856 
   1857 /*
   1858  * Try and disable all interrupts. We just assign interrupts to other
   1859  * processors based on policy. If any were bound by user request, we
   1860  * let them continue and return failure. We do not bother to check
   1861  * for cache affinity while rebinding.
   1862  */
   1863 
   1864 static int
   1865 apic_disable_intr(processorid_t cpun)
   1866 {
   1867 	int bind_cpu = 0, i, hardbound = 0;
   1868 	apic_irq_t *irq_ptr;
   1869 	ulong_t iflag;
   1870 
   1871 	iflag = intr_clear();
   1872 	lock_set(&apic_ioapic_lock);
   1873 
   1874 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
   1875 		if (apic_reprogram_info[i].done == B_FALSE) {
   1876 			if (apic_reprogram_info[i].bindcpu == cpun) {
   1877 				/*
   1878 				 * CPU is busy -- it's the target of
   1879 				 * a pending reprogramming attempt
   1880 				 */
   1881 				lock_clear(&apic_ioapic_lock);
   1882 				intr_restore(iflag);
   1883 				return (PSM_FAILURE);
   1884 			}
   1885 		}
   1886 	}
   1887 
   1888 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
   1889 
   1890 	apic_cpus[cpun].aci_curipl = 0;
   1891 
   1892 	i = apic_min_device_irq;
   1893 	for (; i <= apic_max_device_irq; i++) {
   1894 		/*
   1895 		 * If there are bound interrupts on this cpu, then
   1896 		 * rebind them to other processors.
   1897 		 */
   1898 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
   1899 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
   1900 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
   1901 			    ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
   1902 			    apic_nproc));
   1903 
   1904 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
   1905 				hardbound = 1;
   1906 				continue;
   1907 			}
   1908 
   1909 			if (irq_ptr->airq_temp_cpu == cpun) {
   1910 				do {
   1911 					bind_cpu = apic_next_bind_cpu++;
   1912 					if (bind_cpu >= apic_nproc) {
   1913 						apic_next_bind_cpu = 1;
   1914 						bind_cpu = 0;
   1915 
   1916 					}
   1917 				} while (apic_rebind_all(irq_ptr, bind_cpu));
   1918 			}
   1919 		}
   1920 	}
   1921 
   1922 	lock_clear(&apic_ioapic_lock);
   1923 	intr_restore(iflag);
   1924 
   1925 	if (hardbound) {
   1926 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
   1927 		    "due to user bound interrupts", cpun);
   1928 		return (PSM_FAILURE);
   1929 	}
   1930 	else
   1931 		return (PSM_SUCCESS);
   1932 }
   1933 
   1934 /*
   1935  * Bind interrupts to the CPU's local APIC.
   1936  * Interrupts should not be bound to a CPU's local APIC until the CPU
   1937  * is ready to receive interrupts.
   1938  */
   1939 static void
   1940 apic_enable_intr(processorid_t cpun)
   1941 {
   1942 	int	i;
   1943 	apic_irq_t *irq_ptr;
   1944 	ulong_t iflag;
   1945 
   1946 	iflag = intr_clear();
   1947 	lock_set(&apic_ioapic_lock);
   1948 
   1949 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
   1950 
   1951 	i = apic_min_device_irq;
   1952 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
   1953 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
   1954 			if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) {
   1955 				(void) apic_rebind_all(irq_ptr,
   1956 				    irq_ptr->airq_cpu);
   1957 			}
   1958 		}
   1959 	}
   1960 
   1961 	lock_clear(&apic_ioapic_lock);
   1962 	intr_restore(iflag);
   1963 }
   1964 
   1965 
   1966 /*
   1967  * This function will reprogram the timer.
   1968  *
   1969  * When in oneshot mode the argument is the absolute time in future to
   1970  * generate the interrupt at.
   1971  *
   1972  * When in periodic mode, the argument is the interval at which the
   1973  * interrupts should be generated. There is no need to support the periodic
   1974  * mode timer change at this time.
   1975  */
   1976 static void
   1977 apic_timer_reprogram(hrtime_t time)
   1978 {
   1979 	hrtime_t now;
   1980 	uint_t ticks;
   1981 	int64_t delta;
   1982 
   1983 	/*
   1984 	 * We should be called from high PIL context (CBE_HIGH_PIL),
   1985 	 * so kpreempt is disabled.
   1986 	 */
   1987 
   1988 	if (!apic_oneshot) {
   1989 		/* time is the interval for periodic mode */
   1990 		ticks = APIC_NSECS_TO_TICKS(time);
   1991 	} else {
   1992 		/* one shot mode */
   1993 
   1994 		now = gethrtime();
   1995 		delta = time - now;
   1996 
   1997 		if (delta <= 0) {
   1998 			/*
   1999 			 * requested to generate an interrupt in the past
   2000 			 * generate an interrupt as soon as possible
   2001 			 */
   2002 			ticks = apic_min_timer_ticks;
   2003 		} else if (delta > apic_nsec_max) {
   2004 			/*
   2005 			 * requested to generate an interrupt at a time
   2006 			 * further than what we are capable of. Set to max
   2007 			 * the hardware can handle
   2008 			 */
   2009 
   2010 			ticks = APIC_MAXVAL;
   2011 #ifdef DEBUG
   2012 			cmn_err(CE_CONT, "apic_timer_reprogram, request at"
   2013 			    "  %lld  too far in future, current time"
   2014 			    "  %lld \n", time, now);
   2015 #endif
   2016 		} else
   2017 			ticks = APIC_NSECS_TO_TICKS(delta);
   2018 	}
   2019 
   2020 	if (ticks < apic_min_timer_ticks)
   2021 		ticks = apic_min_timer_ticks;
   2022 
   2023 	apic_reg_ops->apic_write(APIC_INIT_COUNT, ticks);
   2024 }
   2025 
   2026 /*
   2027  * This function will enable timer interrupts.
   2028  */
   2029 static void
   2030 apic_timer_enable(void)
   2031 {
   2032 	/*
   2033 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
   2034 	 * so kpreempt is disabled.
   2035 	 */
   2036 
   2037 	if (!apic_oneshot) {
   2038 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
   2039 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
   2040 	} else {
   2041 		/* one shot */
   2042 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
   2043 		    (apic_clkvect + APIC_BASE_VECT));
   2044 	}
   2045 }
   2046 
   2047 /*
   2048  * This function will disable timer interrupts.
   2049  */
   2050 static void
   2051 apic_timer_disable(void)
   2052 {
   2053 	/*
   2054 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
   2055 	 * so kpreempt is disabled.
   2056 	 */
   2057 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
   2058 	    (apic_clkvect + APIC_BASE_VECT) | AV_MASK);
   2059 }
   2060 
   2061 /*
   2062  * Set timer far into the future and return timer
   2063  * current Count in nanoseconds.
   2064  */
   2065 hrtime_t
   2066 apic_timer_stop_count(void)
   2067 {
   2068 	hrtime_t	ns_val;
   2069 	int		enable_val, count_val;
   2070 
   2071 	/*
   2072 	 * Should be called with interrupts disabled.
   2073 	 */
   2074 	ASSERT(!interrupts_enabled());
   2075 
   2076 	enable_val = apic_reg_ops->apic_read(APIC_LOCAL_TIMER);
   2077 	if ((enable_val & AV_MASK) == AV_MASK)
   2078 		return ((hrtime_t)-1);		/* timer is disabled */
   2079 
   2080 	count_val = apic_reg_ops->apic_read(APIC_CURR_COUNT);
   2081 	ns_val = APIC_TICKS_TO_NSECS(count_val);
   2082 
   2083 	apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
   2084 
   2085 	return (ns_val);
   2086 }
   2087 
   2088 /*
   2089  * Reprogram timer after Deep C-State.
   2090  */
   2091 void
   2092 apic_timer_restart(hrtime_t time)
   2093 {
   2094 	apic_timer_reprogram(time);
   2095 }
   2096 
   2097 ddi_periodic_t apic_periodic_id;
   2098 
   2099 /*
   2100  * If this module needs a periodic handler for the interrupt distribution, it
   2101  * can be added here. The argument to the periodic handler is not currently
   2102  * used, but is reserved for future.
   2103  */
   2104 static void
   2105 apic_post_cyclic_setup(void *arg)
   2106 {
   2107 _NOTE(ARGUNUSED(arg))
   2108 	/* cpu_lock is held */
   2109 	/* set up a periodic handler for intr redistribution */
   2110 
   2111 	/*
   2112 	 * In peridoc mode intr redistribution processing is done in
   2113 	 * apic_intr_enter during clk intr processing
   2114 	 */
   2115 	if (!apic_oneshot)
   2116 		return;
   2117 	/*
   2118 	 * Register a periodical handler for the redistribution processing.
   2119 	 * On X86, CY_LOW_LEVEL is mapped to the level 2 interrupt, so
   2120 	 * DDI_IPL_2 should be passed to ddi_periodic_add() here.
   2121 	 */
   2122 	apic_periodic_id = ddi_periodic_add(
   2123 	    (void (*)(void *))apic_redistribute_compute, NULL,
   2124 	    apic_redistribute_sample_interval, DDI_IPL_2);
   2125 }
   2126 
   2127 static void
   2128 apic_redistribute_compute(void)
   2129 {
   2130 	int	i, j, max_busy;
   2131 
   2132 	if (apic_enable_dynamic_migration) {
   2133 		if (++apic_nticks == apic_sample_factor_redistribution) {
   2134 			/*
   2135 			 * Time to call apic_intr_redistribute().
   2136 			 * reset apic_nticks. This will cause max_busy
   2137 			 * to be calculated below and if it is more than
   2138 			 * apic_int_busy, we will do the whole thing
   2139 			 */
   2140 			apic_nticks = 0;
   2141 		}
   2142 		max_busy = 0;
   2143 		for (i = 0; i < apic_nproc; i++) {
   2144 
   2145 			/*
   2146 			 * Check if curipl is non zero & if ISR is in
   2147 			 * progress
   2148 			 */
   2149 			if (((j = apic_cpus[i].aci_curipl) != 0) &&
   2150 			    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
   2151 
   2152 				int	irq;
   2153 				apic_cpus[i].aci_busy++;
   2154 				irq = apic_cpus[i].aci_current[j];
   2155 				apic_irq_table[irq]->airq_busy++;
   2156 			}
   2157 
   2158 			if (!apic_nticks &&
   2159 			    (apic_cpus[i].aci_busy > max_busy))
   2160 				max_busy = apic_cpus[i].aci_busy;
   2161 		}
   2162 		if (!apic_nticks) {
   2163 			if (max_busy > apic_int_busy_mark) {
   2164 			/*
   2165 			 * We could make the following check be
   2166 			 * skipped > 1 in which case, we get a
   2167 			 * redistribution at half the busy mark (due to
   2168 			 * double interval). Need to be able to collect
   2169 			 * more empirical data to decide if that is a
   2170 			 * good strategy. Punt for now.
   2171 			 */
   2172 				if (apic_skipped_redistribute) {
   2173 					apic_cleanup_busy();
   2174 					apic_skipped_redistribute = 0;
   2175 				} else {
   2176 					apic_intr_redistribute();
   2177 				}
   2178 			} else
   2179 				apic_skipped_redistribute++;
   2180 		}
   2181 	}
   2182 }
   2183 
   2184 
   2185 /*
   2186  * The following functions are in the platform specific file so that they
   2187  * can be different functions depending on whether we are running on
   2188  * bare metal or a hypervisor.
   2189  */
   2190 
   2191 /*
   2192  * map an apic for memory-mapped access
   2193  */
   2194 uint32_t *
   2195 mapin_apic(uint32_t addr, size_t len, int flags)
   2196 {
   2197 	/*LINTED: pointer cast may result in improper alignment */
   2198 	return ((uint32_t *)psm_map_phys(addr, len, flags));
   2199 }
   2200 
   2201 uint32_t *
   2202 mapin_ioapic(uint32_t addr, size_t len, int flags)
   2203 {
   2204 	return (mapin_apic(addr, len, flags));
   2205 }
   2206 
   2207 /*
   2208  * unmap an apic
   2209  */
   2210 void
   2211 mapout_apic(caddr_t addr, size_t len)
   2212 {
   2213 	psm_unmap_phys(addr, len);
   2214 }
   2215 
   2216 void
   2217 mapout_ioapic(caddr_t addr, size_t len)
   2218 {
   2219 	mapout_apic(addr, len);
   2220 }
   2221 
   2222 /*
   2223  * Check to make sure there are enough irq slots
   2224  */
   2225 int
   2226 apic_check_free_irqs(int count)
   2227 {
   2228 	int i, avail;
   2229 
   2230 	avail = 0;
   2231 	for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) {
   2232 		if ((apic_irq_table[i] == NULL) ||
   2233 		    apic_irq_table[i]->airq_mps_intr_index == FREE_INDEX) {
   2234 			if (++avail >= count)
   2235 				return (PSM_SUCCESS);
   2236 		}
   2237 	}
   2238 	return (PSM_FAILURE);
   2239 }
   2240 
   2241 /*
   2242  * This function allocates "count" MSI vector(s) for the given "dip/pri/type"
   2243  */
   2244 int
   2245 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
   2246     int behavior)
   2247 {
   2248 	int	rcount, i;
   2249 	uchar_t	start, irqno;
   2250 	uint32_t cpu;
   2251 	major_t	major;
   2252 	apic_irq_t	*irqptr;
   2253 
   2254 	DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p "
   2255 	    "inum=0x%x  pri=0x%x count=0x%x behavior=%d\n",
   2256 	    (void *)dip, inum, pri, count, behavior));
   2257 
   2258 	if (count > 1) {
   2259 		if (behavior == DDI_INTR_ALLOC_STRICT &&
   2260 		    apic_multi_msi_enable == 0)
   2261 			return (0);
   2262 		if (apic_multi_msi_enable == 0)
   2263 			count = 1;
   2264 	}
   2265 
   2266 	if ((rcount = apic_navail_vector(dip, pri)) > count)
   2267 		rcount = count;
   2268 	else if (rcount == 0 || (rcount < count &&
   2269 	    behavior == DDI_INTR_ALLOC_STRICT))
   2270 		return (0);
   2271 
   2272 	/* if not ISP2, then round it down */
   2273 	if (!ISP2(rcount))
   2274 		rcount = 1 << (highbit(rcount) - 1);
   2275 
   2276 	mutex_enter(&airq_mutex);
   2277 
   2278 	for (start = 0; rcount > 0; rcount >>= 1) {
   2279 		if ((start = apic_find_multi_vectors(pri, rcount)) != 0 ||
   2280 		    behavior == DDI_INTR_ALLOC_STRICT)
   2281 			break;
   2282 	}
   2283 
   2284 	if (start == 0) {
   2285 		/* no vector available */
   2286 		mutex_exit(&airq_mutex);
   2287 		return (0);
   2288 	}
   2289 
   2290 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
   2291 		/* not enough free irq slots available */
   2292 		mutex_exit(&airq_mutex);
   2293 		return (0);
   2294 	}
   2295 
   2296 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
   2297 	for (i = 0; i < rcount; i++) {
   2298 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
   2299 		    (uchar_t)-1) {
   2300 			/*
   2301 			 * shouldn't happen because of the
   2302 			 * apic_check_free_irqs() check earlier
   2303 			 */
   2304 			mutex_exit(&airq_mutex);
   2305 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
   2306 			    "apic_allocate_irq failed\n"));
   2307 			return (i);
   2308 		}
   2309 		apic_max_device_irq = max(irqno, apic_max_device_irq);
   2310 		apic_min_device_irq = min(irqno, apic_min_device_irq);
   2311 		irqptr = apic_irq_table[irqno];
   2312 #ifdef	DEBUG
   2313 		if (apic_vector_to_irq[start + i] != APIC_RESV_IRQ)
   2314 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
   2315 			    "apic_vector_to_irq is not APIC_RESV_IRQ\n"));
   2316 #endif
   2317 		apic_vector_to_irq[start + i] = (uchar_t)irqno;
   2318 
   2319 		irqptr->airq_vector = (uchar_t)(start + i);
   2320 		irqptr->airq_ioapicindex = (uchar_t)inum;	/* start */
   2321 		irqptr->airq_intin_no = (uchar_t)rcount;
   2322 		irqptr->airq_ipl = pri;
   2323 		irqptr->airq_vector = start + i;
   2324 		irqptr->airq_origirq = (uchar_t)(inum + i);
   2325 		irqptr->airq_share_id = 0;
   2326 		irqptr->airq_mps_intr_index = MSI_INDEX;
   2327 		irqptr->airq_dip = dip;
   2328 		irqptr->airq_major = major;
   2329 		if (i == 0) /* they all bound to the same cpu */
   2330 			cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno,
   2331 			    0xff, 0xff);
   2332 		else
   2333 			irqptr->airq_cpu = cpu;
   2334 		DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x "
   2335 		    "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno,
   2336 		    (void *)irqptr->airq_dip, irqptr->airq_vector,
   2337 		    irqptr->airq_origirq, pri));
   2338 	}
   2339 	mutex_exit(&airq_mutex);
   2340 	return (rcount);
   2341 }
   2342 
   2343 /*
   2344  * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type"
   2345  */
   2346 int
   2347 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
   2348     int behavior)
   2349 {
   2350 	int	rcount, i;
   2351 	major_t	major;
   2352 
   2353 	mutex_enter(&airq_mutex);
   2354 
   2355 	if ((rcount = apic_navail_vector(dip, pri)) > count)
   2356 		rcount = count;
   2357 	else if (rcount == 0 || (rcount < count &&
   2358 	    behavior == DDI_INTR_ALLOC_STRICT)) {
   2359 		rcount = 0;
   2360 		goto out;
   2361 	}
   2362 
   2363 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
   2364 		/* not enough free irq slots available */
   2365 		rcount = 0;
   2366 		goto out;
   2367 	}
   2368 
   2369 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
   2370 	for (i = 0; i < rcount; i++) {
   2371 		uchar_t	vector, irqno;
   2372 		apic_irq_t	*irqptr;
   2373 
   2374 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
   2375 		    (uchar_t)-1) {
   2376 			/*
   2377 			 * shouldn't happen because of the
   2378 			 * apic_check_free_irqs() check earlier
   2379 			 */
   2380 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
   2381 			    "apic_allocate_irq failed\n"));
   2382 			rcount = i;
   2383 			goto out;
   2384 		}
   2385 		if ((vector = apic_allocate_vector(pri, irqno, 1)) == 0) {
   2386 			/*
   2387 			 * shouldn't happen because of the
   2388 			 * apic_navail_vector() call earlier
   2389 			 */
   2390 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
   2391 			    "apic_allocate_vector failed\n"));
   2392 			rcount = i;
   2393 			goto out;
   2394 		}
   2395 		apic_max_device_irq = max(irqno, apic_max_device_irq);
   2396 		apic_min_device_irq = min(irqno, apic_min_device_irq);
   2397 		irqptr = apic_irq_table[irqno];
   2398 		irqptr->airq_vector = (uchar_t)vector;
   2399 		irqptr->airq_ipl = pri;
   2400 		irqptr->airq_origirq = (uchar_t)(inum + i);
   2401 		irqptr->airq_share_id = 0;
   2402 		irqptr->airq_mps_intr_index = MSIX_INDEX;
   2403 		irqptr->airq_dip = dip;
   2404 		irqptr->airq_major = major;
   2405 		irqptr->airq_cpu = apic_bind_intr(dip, irqno, 0xff, 0xff);
   2406 	}
   2407 out:
   2408 	mutex_exit(&airq_mutex);
   2409 	return (rcount);
   2410 }
   2411 
   2412 /*
   2413  * Allocate a free vector for irq at ipl. Takes care of merging of multiple
   2414  * IPLs into a single APIC level as well as stretching some IPLs onto multiple
   2415  * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority
   2416  * requests and allocated only when pri is set.
   2417  */
   2418 uchar_t
   2419 apic_allocate_vector(int ipl, int irq, int pri)
   2420 {
   2421 	int	lowest, highest, i;
   2422 
   2423 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
   2424 	lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL;
   2425 
   2426 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
   2427 		lowest -= APIC_VECTOR_PER_IPL;
   2428 
   2429 #ifdef	DEBUG
   2430 	if (apic_restrict_vector)	/* for testing shared interrupt logic */
   2431 		highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS;
   2432 #endif /* DEBUG */
   2433 	if (pri == 0)
   2434 		highest -= APIC_HI_PRI_VECTS;
   2435 
   2436 	for (i = lowest; i <= highest; i++) {
   2437 		if (APIC_CHECK_RESERVE_VECTORS(i))
   2438 			continue;
   2439 		if (apic_vector_to_irq[i] == APIC_RESV_IRQ) {
   2440 			apic_vector_to_irq[i] = (uchar_t)irq;
   2441 			return (i);
   2442 		}
   2443 	}
   2444 
   2445 	return (0);
   2446 }
   2447 
   2448 /* Mark vector as not being used by any irq */
   2449 void
   2450 apic_free_vector(uchar_t vector)
   2451 {
   2452 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
   2453 }
   2454 
   2455 uint32_t
   2456 ioapic_read(int ioapic_ix, uint32_t reg)
   2457 {
   2458 	volatile uint32_t *ioapic;
   2459 
   2460 	ioapic = apicioadr[ioapic_ix];
   2461 	ioapic[APIC_IO_REG] = reg;
   2462 	return (ioapic[APIC_IO_DATA]);
   2463 }
   2464 
   2465 void
   2466 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
   2467 {
   2468 	volatile uint32_t *ioapic;
   2469 
   2470 	ioapic = apicioadr[ioapic_ix];
   2471 	ioapic[APIC_IO_REG] = reg;
   2472 	ioapic[APIC_IO_DATA] = value;
   2473 }
   2474 
   2475 void
   2476 ioapic_write_eoi(int ioapic_ix, uint32_t value)
   2477 {
   2478 	volatile uint32_t *ioapic;
   2479 
   2480 	ioapic = apicioadr[ioapic_ix];
   2481 	ioapic[APIC_IO_EOI] = value;
   2482 }
   2483 
   2484 static processorid_t
   2485 apic_find_cpu(int flag)
   2486 {
   2487 	processorid_t acid = 0;
   2488 	int i;
   2489 
   2490 	/* Find the first CPU with the passed-in flag set */
   2491 	for (i = 0; i < apic_nproc; i++) {
   2492 		if (apic_cpus[i].aci_status & flag) {
   2493 			acid = i;
   2494 			break;
   2495 		}
   2496 	}
   2497 
   2498 	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
   2499 	return (acid);
   2500 }
   2501 
   2502 /*
   2503  * Call rebind to do the actual programming.
   2504  * Must be called with interrupts disabled and apic_ioapic_lock held
   2505  * 'p' is polymorphic -- if this function is called to process a deferred
   2506  * reprogramming, p is of type 'struct ioapic_reprogram_data *', from which
   2507  * the irq pointer is retrieved.  If not doing deferred reprogramming,
   2508  * p is of the type 'apic_irq_t *'.
   2509  *
   2510  * apic_ioapic_lock must be held across this call, as it protects apic_rebind
   2511  * and it protects apic_find_cpu() from a race in which a CPU can be taken
   2512  * offline after a cpu is selected, but before apic_rebind is called to
   2513  * bind interrupts to it.
   2514  */
   2515 int
   2516 apic_setup_io_intr(void *p, int irq, boolean_t deferred)
   2517 {
   2518 	apic_irq_t *irqptr;
   2519 	struct ioapic_reprogram_data *drep = NULL;
   2520 	int rv;
   2521 
   2522 	if (deferred) {
   2523 		drep = (struct ioapic_reprogram_data *)p;
   2524 		ASSERT(drep != NULL);
   2525 		irqptr = drep->irqp;
   2526 	} else
   2527 		irqptr = (apic_irq_t *)p;
   2528 
   2529 	ASSERT(irqptr != NULL);
   2530 
   2531 	rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, drep);
   2532 	if (rv) {
   2533 		/*
   2534 		 * CPU is not up or interrupts are disabled. Fall back to
   2535 		 * the first available CPU
   2536 		 */
   2537 		rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE),
   2538 		    drep);
   2539 	}
   2540 
   2541 	return (rv);
   2542 }
   2543 
   2544 
   2545 uchar_t
   2546 apic_modify_vector(uchar_t vector, int irq)
   2547 {
   2548 	apic_vector_to_irq[vector] = (uchar_t)irq;
   2549 	return (vector);
   2550 }
   2551 
   2552 char *
   2553 apic_get_apic_type()
   2554 {
   2555 	return (apic_psm_info.p_mach_idstring);
   2556 }
   2557 
   2558 void
   2559 x2apic_update_psm()
   2560 {
   2561 	struct psm_ops *pops = &apic_ops;
   2562 
   2563 	ASSERT(pops != NULL);
   2564 
   2565 	/*
   2566 	 * We don't need to do any magic if one of the following
   2567 	 * conditions is true :
   2568 	 * - Not being run under kernel debugger.
   2569 	 * - MP is not set.
   2570 	 * - Booted with one CPU only.
   2571 	 * - One CPU configured.
   2572 	 *
   2573 	 * We set apic_common_send_ipi() since kernel debuggers
   2574 	 * attempt to send IPIs to other slave CPUs during
   2575 	 * entry (exit) from (to) debugger.
   2576 	 */
   2577 	if (!(boothowto & RB_DEBUG) || use_mp == 0 ||
   2578 	    apic_nproc == 1 || boot_ncpus == 1) {
   2579 		pops->psm_send_ipi =  x2apic_send_ipi;
   2580 	} else {
   2581 		pops->psm_send_ipi =  apic_common_send_ipi;
   2582 	}
   2583 
   2584 	pops->psm_intr_exit = x2apic_intr_exit;
   2585 	pops->psm_setspl = x2apic_setspl;
   2586 
   2587 	send_dirintf = pops->psm_send_ipi;
   2588 
   2589 	apic_mode = LOCAL_X2APIC;
   2590 	apic_change_ops();
   2591 }
   2592 
   2593 static void
   2594 apic_intrmap_init(int apic_mode)
   2595 {
   2596 	int suppress_brdcst_eoi = 0;
   2597 
   2598 	if (psm_vt_ops != NULL) {
   2599 		/*
   2600 		 * Since X2APIC requires the use of interrupt remapping
   2601 		 * (though this is not documented explicitly in the Intel
   2602 		 * documentation (yet)), initialize interrupt remapping
   2603 		 * support before initializing the X2APIC unit.
   2604 		 */
   2605 		if (((apic_intrmap_ops_t *)psm_vt_ops)->
   2606 		    apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
   2607 
   2608 			apic_vt_ops = psm_vt_ops;
   2609 
   2610 			/*
   2611 			 * We leverage the interrupt remapping engine to
   2612 			 * suppress broadcast EOI; thus we must send the
   2613 			 * directed EOI with the directed-EOI handler.
   2614 			 */
   2615 			if (apic_directed_EOI_supported() == 0) {
   2616 				suppress_brdcst_eoi = 1;
   2617 			}
   2618 
   2619 			apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
   2620 
   2621 			if (apic_detect_x2apic()) {
   2622 				apic_enable_x2apic();
   2623 			}
   2624 
   2625 			if (apic_directed_EOI_supported() == 0) {
   2626 				apic_set_directed_EOI_handler();
   2627 			}
   2628 		}
   2629 	}
   2630 }
   2631 
   2632 /*ARGSUSED*/
   2633 static void
   2634 apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt)
   2635 {
   2636 	irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
   2637 }
   2638 
   2639 /*ARGSUSED*/
   2640 static void
   2641 apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs)
   2642 {
   2643 	mregs->mr_addr = MSI_ADDR_HDR |
   2644 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
   2645 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
   2646 	    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
   2647 	mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
   2648 	    mregs->mr_data;
   2649 }
   2650