OpenGrok

Cross Reference: apic.c
xref: /onnv/onnv-gate/usr/src/uts/i86pc/io/pcplusmp/apic.c
Home | History | Annotate | Line # | Download | only in pcplusmp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
     24  */
     25 /*
     26  * Copyright (c) 2010, Intel Corporation.
     27  * All rights reserved.
     28  */
     29 
     30 
     31 /*
     32  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
     33  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
     34  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
     35  * PSMI 1.5 extensions are supported in Solaris Nevada.
     36  * PSMI 1.6 extensions are supported in Solaris Nevada.
     37  * PSMI 1.7 extensions are supported in Solaris Nevada.
     38  */
     39 #define	PSMI_1_7
     40 
     41 #include <sys/processor.h>
     42 #include <sys/time.h>
     43 #include <sys/psm.h>
     44 #include <sys/smp_impldefs.h>
     45 #include <sys/cram.h>
     46 #include <sys/acpi/acpi.h>
     47 #include <sys/acpica.h>
     48 #include <sys/psm_common.h>
     49 #include <sys/apic.h>
     50 #include <sys/pit.h>
     51 #include <sys/ddi.h>
     52 #include <sys/sunddi.h>
     53 #include <sys/ddi_impldefs.h>
     54 #include <sys/pci.h>
     55 #include <sys/promif.h>
     56 #include <sys/x86_archext.h>
     57 #include <sys/cpc_impl.h>
     58 #include <sys/uadmin.h>
     59 #include <sys/panic.h>
     60 #include <sys/debug.h>
     61 #include <sys/archsystm.h>
     62 #include <sys/trap.h>
     63 #include <sys/machsystm.h>
     64 #include <sys/sysmacros.h>
     65 #include <sys/cpuvar.h>
     66 #include <sys/rm_platter.h>
     67 #include <sys/privregs.h>
     68 #include <sys/note.h>
     69 #include <sys/pci_intr_lib.h>
     70 #include <sys/spl.h>
     71 #include <sys/clock.h>
     72 #include <sys/dditypes.h>
     73 #include <sys/sunddi.h>
     74 #include <sys/x_call.h>
     75 #include <sys/reboot.h>
     76 #include <sys/hpet.h>
     77 #include <sys/apic_common.h>
     78 #include <sys/apic_timer.h>
     79 
     80 /*
     81  *	Local Function Prototypes
     82  */
     83 static void apic_init_intr(void);
     84 
     85 /*
     86  *	standard MP entries
     87  */
     88 static int	apic_probe(void);
     89 static int	apic_getclkirq(int ipl);
     90 static void	apic_init(void);
     91 static void	apic_picinit(void);
     92 static int	apic_post_cpu_start(void);
     93 static int	apic_intr_enter(int ipl, int *vect);
     94 static void	apic_setspl(int ipl);
     95 static void	x2apic_setspl(int ipl);
     96 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
     97 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
     98 static int	apic_disable_intr(processorid_t cpun);
     99 static void	apic_enable_intr(processorid_t cpun);
    100 static int		apic_get_ipivect(int ipl, int type);
    101 static void	apic_post_cyclic_setup(void *arg);
    102 
    103 /*
    104  * The following vector assignments influence the value of ipltopri and
    105  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
    106  * idle to 0 and IPL 0 to 0xf to differentiate idle in case
    107  * we care to do so in future. Note some IPLs which are rarely used
    108  * will share the vector ranges and heavily used IPLs (5 and 6) have
    109  * a wide range.
    110  *
    111  * This array is used to initialize apic_ipls[] (in apic_init()).
    112  *
    113  *	IPL		Vector range.		as passed to intr_enter
    114  *	0		none.
    115  *	1,2,3		0x20-0x2f		0x0-0xf
    116  *	4		0x30-0x3f		0x10-0x1f
    117  *	5		0x40-0x5f		0x20-0x3f
    118  *	6		0x60-0x7f		0x40-0x5f
    119  *	7,8,9		0x80-0x8f		0x60-0x6f
    120  *	10		0x90-0x9f		0x70-0x7f
    121  *	11		0xa0-0xaf		0x80-0x8f
    122  *	...		...
    123  *	15		0xe0-0xef		0xc0-0xcf
    124  *	15		0xf0-0xff		0xd0-0xdf
    125  */
    126 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
    127 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15
    128 };
    129 	/*
    130 	 * The ipl of an ISR at vector X is apic_vectortoipl[X>>4]
    131 	 * NOTE that this is vector as passed into intr_enter which is
    132 	 * programmed vector - 0x20 (APIC_BASE_VECT)
    133 	 */
    134 
    135 uchar_t	apic_ipltopri[MAXIPL + 1];	/* unix ipl to apic pri	*/
    136 	/* The taskpri to be programmed into apic to mask given ipl */
    137 
    138 #if defined(__amd64)
    139 uchar_t	apic_cr8pri[MAXIPL + 1];	/* unix ipl to cr8 pri	*/
    140 #endif
    141 
    142 /*
    143  * Correlation of the hardware vector to the IPL in use, initialized
    144  * from apic_vectortoipl[] in apic_init().  The final IPLs may not correlate
    145  * to the IPLs in apic_vectortoipl on some systems that share interrupt lines
    146  * connected to errata-stricken IOAPICs
    147  */
    148 uchar_t apic_ipls[APIC_AVAIL_VECTOR];
    149 
    150 /*
    151  * Patchable global variables.
    152  */
    153 int	apic_enable_hwsoftint = 0;	/* 0 - disable, 1 - enable	*/
    154 int	apic_enable_bind_log = 1;	/* 1 - display interrupt binding log */
    155 
    156 /*
    157  *	Local static data
    158  */
    159 static struct	psm_ops apic_ops = {
    160 	apic_probe,
    161 
    162 	apic_init,
    163 	apic_picinit,
    164 	apic_intr_enter,
    165 	apic_intr_exit,
    166 	apic_setspl,
    167 	apic_addspl,
    168 	apic_delspl,
    169 	apic_disable_intr,
    170 	apic_enable_intr,
    171 	(int (*)(int))NULL,		/* psm_softlvl_to_irq */
    172 	(void (*)(int))NULL,		/* psm_set_softintr */
    173 
    174 	apic_set_idlecpu,
    175 	apic_unset_idlecpu,
    176 
    177 	apic_clkinit,
    178 	apic_getclkirq,
    179 	(void (*)(void))NULL,		/* psm_hrtimeinit */
    180 	apic_gethrtime,
    181 
    182 	apic_get_next_processorid,
    183 	apic_cpu_start,
    184 	apic_post_cpu_start,
    185 	apic_shutdown,
    186 	apic_get_ipivect,
    187 	apic_send_ipi,
    188 
    189 	(int (*)(dev_info_t *, int))NULL,	/* psm_translate_irq */
    190 	(void (*)(int, char *))NULL,	/* psm_notify_error */
    191 	(void (*)(int))NULL,		/* psm_notify_func */
    192 	apic_timer_reprogram,
    193 	apic_timer_enable,
    194 	apic_timer_disable,
    195 	apic_post_cyclic_setup,
    196 	apic_preshutdown,
    197 	apic_intr_ops,			/* Advanced DDI Interrupt framework */
    198 	apic_state,			/* save, restore apic state for S3 */
    199 	apic_cpu_ops,			/* CPU control interface. */
    200 };
    201 
    202 struct psm_ops *psmops = &apic_ops;
    203 
    204 static struct	psm_info apic_psm_info = {
    205 	PSM_INFO_VER01_7,			/* version */
    206 	PSM_OWN_EXCLUSIVE,			/* ownership */
    207 	(struct psm_ops *)&apic_ops,		/* operation */
    208 	APIC_PCPLUSMP_NAME,			/* machine name */
    209 	"pcplusmp v1.4 compatible",
    210 };
    211 
    212 static void *apic_hdlp;
    213 
    214 /*
    215  * apic_let_idle_redistribute can have the following values:
    216  * 0 - If clock decremented it from 1 to 0, clock has to call redistribute.
    217  * apic_redistribute_lock prevents multiple idle cpus from redistributing
    218  */
    219 int	apic_num_idle_redistributions = 0;
    220 static	int apic_let_idle_redistribute = 0;
    221 
    222 /* to gather intr data and redistribute */
    223 static void apic_redistribute_compute(void);
    224 
    225 /*
    226  *	This is the loadable module wrapper
    227  */
    228 
    229 int
    230 _init(void)
    231 {
    232 	if (apic_coarse_hrtime)
    233 		apic_ops.psm_gethrtime = &apic_gettime;
    234 	return (psm_mod_init(&apic_hdlp, &apic_psm_info));
    235 }
    236 
    237 int
    238 _fini(void)
    239 {
    240 	return (psm_mod_fini(&apic_hdlp, &apic_psm_info));
    241 }
    242 
    243 int
    244 _info(struct modinfo *modinfop)
    245 {
    246 	return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop));
    247 }
    248 
    249 static int
    250 apic_probe(void)
    251 {
    252 	/* check if apix is initialized */
    253 	if (apix_enable && apix_loaded())
    254 		return (PSM_FAILURE);
    255 	else
    256 		apix_enable = 0; /* continue using pcplusmp PSM */
    257 
    258 	return (apic_probe_common(apic_psm_info.p_mach_idstring));
    259 }
    260 
    261 static uchar_t
    262 apic_xlate_vector_by_irq(uchar_t irq)
    263 {
    264 	if (apic_irq_table[irq] == NULL)
    265 		return (0);
    266 
    267 	return (apic_irq_table[irq]->airq_vector);
    268 }
    269 
    270 void
    271 apic_init(void)
    272 {
    273 	int i;
    274 	int	j = 1;
    275 
    276 	psm_get_ioapicid = apic_get_ioapicid;
    277 	psm_get_localapicid = apic_get_localapicid;
    278 	psm_xlate_vector_by_irq = apic_xlate_vector_by_irq;
    279 
    280 	apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */
    281 	for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) {
    282 		if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) &&
    283 		    (apic_vectortoipl[i + 1] == apic_vectortoipl[i]))
    284 			/* get to highest vector at the same ipl */
    285 			continue;
    286 		for (; j <= apic_vectortoipl[i]; j++) {
    287 			apic_ipltopri[j] = (i << APIC_IPL_SHIFT) +
    288 			    APIC_BASE_VECT;
    289 		}
    290 	}
    291 	for (; j < MAXIPL + 1; j++)
    292 		/* fill up any empty ipltopri slots */
    293 		apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT;
    294 	apic_init_common();
    295 #if defined(__amd64)
    296 	/*
    297 	 * Make cpu-specific interrupt info point to cr8pri vector
    298 	 */
    299 	for (i = 0; i <= MAXIPL; i++)
    300 		apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT;
    301 	CPU->cpu_pri_data = apic_cr8pri;
    302 #else
    303 	if (cpuid_have_cr8access(CPU))
    304 		apic_have_32bit_cr8 = 1;
    305 #endif	/* __amd64 */
    306 }
    307 
    308 static void
    309 apic_init_intr(void)
    310 {
    311 	processorid_t	cpun = psm_get_cpu_id();
    312 	uint_t nlvt;
    313 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
    314 
    315 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
    316 
    317 	if (apic_mode == LOCAL_APIC) {
    318 		/*
    319 		 * We are running APIC in MMIO mode.
    320 		 */
    321 		if (apic_flat_model) {
    322 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
    323 			    APIC_FLAT_MODEL);
    324 		} else {
    325 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
    326 			    APIC_CLUSTER_MODEL);
    327 		}
    328 
    329 		apic_reg_ops->apic_write(APIC_DEST_REG,
    330 		    AV_HIGH_ORDER >> cpun);
    331 	}
    332 
    333 	if (apic_directed_EOI_supported()) {
    334 		/*
    335 		 * Setting the 12th bit in the Spurious Interrupt Vector
    336 		 * Register suppresses broadcast EOIs generated by the local
    337 		 * APIC. The suppression of broadcast EOIs happens only when
    338 		 * interrupts are level-triggered.
    339 		 */
    340 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
    341 	}
    342 
    343 	/* need to enable APIC before unmasking NMI */
    344 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
    345 
    346 	/*
    347 	 * Presence of an invalid vector with delivery mode AV_FIXED can
    348 	 * cause an error interrupt, even if the entry is masked...so
    349 	 * write a valid vector to LVT entries along with the mask bit
    350 	 */
    351 
    352 	/* All APICs have timer and LINT0/1 */
    353 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
    354 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
    355 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
    356 
    357 	/*
    358 	 * On integrated APICs, the number of LVT entries is
    359 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
    360 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
    361 	 */
    362 
    363 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
    364 		nlvt = 3;
    365 	} else {
    366 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
    367 		    0xFF) + 1;
    368 	}
    369 
    370 	if (nlvt >= 5) {
    371 		/* Enable performance counter overflow interrupt */
    372 
    373 		if (!is_x86_feature(x86_featureset, X86FSET_MSR))
    374 			apic_enable_cpcovf_intr = 0;
    375 		if (apic_enable_cpcovf_intr) {
    376 			if (apic_cpcovf_vect == 0) {
    377 				int ipl = APIC_PCINT_IPL;
    378 				int irq = apic_get_ipivect(ipl, -1);
    379 
    380 				ASSERT(irq != -1);
    381 				apic_cpcovf_vect =
    382 				    apic_irq_table[irq]->airq_vector;
    383 				ASSERT(apic_cpcovf_vect);
    384 				(void) add_avintr(NULL, ipl,
    385 				    (avfunc)kcpc_hw_overflow_intr,
    386 				    "apic pcint", irq, NULL, NULL, NULL, NULL);
    387 				kcpc_hw_overflow_intr_installed = 1;
    388 				kcpc_hw_enable_cpc_intr =
    389 				    apic_cpcovf_mask_clear;
    390 			}
    391 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
    392 			    apic_cpcovf_vect);
    393 		}
    394 	}
    395 
    396 	if (nlvt >= 6) {
    397 		/* Only mask TM intr if the BIOS apparently doesn't use it */
    398 
    399 		uint32_t lvtval;
    400 
    401 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
    402 		if (((lvtval & AV_MASK) == AV_MASK) ||
    403 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
    404 			apic_reg_ops->apic_write(APIC_THERM_VECT,
    405 			    AV_MASK|APIC_RESV_IRQ);
    406 		}
    407 	}
    408 
    409 	/* Enable error interrupt */
    410 
    411 	if (nlvt >= 4 && apic_enable_error_intr) {
    412 		if (apic_errvect == 0) {
    413 			int ipl = 0xf;	/* get highest priority intr */
    414 			int irq = apic_get_ipivect(ipl, -1);
    415 
    416 			ASSERT(irq != -1);
    417 			apic_errvect = apic_irq_table[irq]->airq_vector;
    418 			ASSERT(apic_errvect);
    419 			/*
    420 			 * Not PSMI compliant, but we are going to merge
    421 			 * with ON anyway
    422 			 */
    423 			(void) add_avintr((void *)NULL, ipl,
    424 			    (avfunc)apic_error_intr, "apic error intr",
    425 			    irq, NULL, NULL, NULL, NULL);
    426 		}
    427 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
    428 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    429 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
    430 	}
    431 
    432 	/* Enable CMCI interrupt */
    433 	if (cmi_enable_cmci) {
    434 
    435 		mutex_enter(&cmci_cpu_setup_lock);
    436 		if (cmci_cpu_setup_registered == 0) {
    437 			mutex_enter(&cpu_lock);
    438 			register_cpu_setup_func(cmci_cpu_setup, NULL);
    439 			mutex_exit(&cpu_lock);
    440 			cmci_cpu_setup_registered = 1;
    441 		}
    442 		mutex_exit(&cmci_cpu_setup_lock);
    443 
    444 		if (apic_cmci_vect == 0) {
    445 			int ipl = 0x2;
    446 			int irq = apic_get_ipivect(ipl, -1);
    447 
    448 			ASSERT(irq != -1);
    449 			apic_cmci_vect = apic_irq_table[irq]->airq_vector;
    450 			ASSERT(apic_cmci_vect);
    451 
    452 			(void) add_avintr(NULL, ipl,
    453 			    (avfunc)cmi_cmci_trap,
    454 			    "apic cmci intr", irq, NULL, NULL, NULL, NULL);
    455 		}
    456 		apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
    457 	}
    458 }
    459 
    460 static void
    461 apic_picinit(void)
    462 {
    463 	int i, j;
    464 	uint_t isr;
    465 
    466 	/*
    467 	 * Initialize and enable interrupt remapping before apic
    468 	 * hardware initialization
    469 	 */
    470 	apic_intrmap_init(apic_mode);
    471 
    472 	/*
    473 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
    474 	 * bit on without clearing it with EOI.  Since softint
    475 	 * uses vector 0x20 to interrupt itself, so softint will
    476 	 * not work on this machine.  In order to fix this problem
    477 	 * a check is made to verify all the isr bits are clear.
    478 	 * If not, EOIs are issued to clear the bits.
    479 	 */
    480 	for (i = 7; i >= 1; i--) {
    481 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
    482 		if (isr != 0)
    483 			for (j = 0; ((j < 32) && (isr != 0)); j++)
    484 				if (isr & (1 << j)) {
    485 					apic_reg_ops->apic_write(
    486 					    APIC_EOI_REG, 0);
    487 					isr &= ~(1 << j);
    488 					apic_error |= APIC_ERR_BOOT_EOI;
    489 				}
    490 	}
    491 
    492 	/* set a flag so we know we have run apic_picinit() */
    493 	apic_picinit_called = 1;
    494 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
    495 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
    496 	LOCK_INIT_CLEAR(&apic_error_lock);
    497 	LOCK_INIT_CLEAR(&apic_mode_switch_lock);
    498 
    499 	picsetup();	 /* initialise the 8259 */
    500 
    501 	/* add nmi handler - least priority nmi handler */
    502 	LOCK_INIT_CLEAR(&apic_nmi_lock);
    503 
    504 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
    505 	    "pcplusmp NMI handler", (caddr_t)NULL))
    506 		cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler");
    507 
    508 	/*
    509 	 * Check for directed-EOI capability in the local APIC.
    510 	 */
    511 	if (apic_directed_EOI_supported() == 1) {
    512 		apic_set_directed_EOI_handler();
    513 	}
    514 
    515 	apic_init_intr();
    516 
    517 	/* enable apic mode if imcr present */
    518 	if (apic_imcrp) {
    519 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
    520 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
    521 	}
    522 
    523 	ioapic_init_intr(IOAPIC_MASK);
    524 }
    525 
    526 #ifdef	DEBUG
    527 void
    528 apic_break(void)
    529 {
    530 }
    531 #endif /* DEBUG */
    532 
    533 /*
    534  * platform_intr_enter
    535  *
    536  *	Called at the beginning of the interrupt service routine to
    537  *	mask all level equal to and below the interrupt priority
    538  *	of the interrupting vector.  An EOI should be given to
    539  *	the interrupt controller to enable other HW interrupts.
    540  *
    541  *	Return -1 for spurious interrupts
    542  *
    543  */
    544 /*ARGSUSED*/
    545 static int
    546 apic_intr_enter(int ipl, int *vectorp)
    547 {
    548 	uchar_t vector;
    549 	int nipl;
    550 	int irq;
    551 	ulong_t iflag;
    552 	apic_cpus_info_t *cpu_infop;
    553 
    554 	/*
    555 	 * The real vector delivered is (*vectorp + 0x20), but our caller
    556 	 * subtracts 0x20 from the vector before passing it to us.
    557 	 * (That's why APIC_BASE_VECT is 0x20.)
    558 	 */
    559 	vector = (uchar_t)*vectorp;
    560 
    561 	/* if interrupted by the clock, increment apic_nsec_since_boot */
    562 	if (vector == apic_clkvect) {
    563 		if (!apic_oneshot) {
    564 			/* NOTE: this is not MT aware */
    565 			apic_hrtime_stamp++;
    566 			apic_nsec_since_boot += apic_nsec_per_intr;
    567 			apic_hrtime_stamp++;
    568 			last_count_read = apic_hertz_count;
    569 			apic_redistribute_compute();
    570 		}
    571 
    572 		/* We will avoid all the book keeping overhead for clock */
    573 		nipl = apic_ipls[vector];
    574 
    575 		*vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT];
    576 		if (apic_mode == LOCAL_APIC) {
    577 #if defined(__amd64)
    578 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
    579 			    APIC_IPL_SHIFT));
    580 #else
    581 			if (apic_have_32bit_cr8)
    582 				setcr8((ulong_t)(apic_ipltopri[nipl] >>
    583 				    APIC_IPL_SHIFT));
    584 			else
    585 				LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
    586 				    (uint32_t)apic_ipltopri[nipl]);
    587 #endif
    588 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
    589 		} else {
    590 			X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
    591 			X2APIC_WRITE(APIC_EOI_REG, 0);
    592 		}
    593 
    594 		return (nipl);
    595 	}
    596 
    597 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
    598 
    599 	if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) {
    600 		cpu_infop->aci_spur_cnt++;
    601 		return (APIC_INT_SPURIOUS);
    602 	}
    603 
    604 	/* Check if the vector we got is really what we need */
    605 	if (apic_revector_pending) {
    606 		/*
    607 		 * Disable interrupts for the duration of
    608 		 * the vector translation to prevent a self-race for
    609 		 * the apic_revector_lock.  This cannot be done
    610 		 * in apic_xlate_vector because it is recursive and
    611 		 * we want the vector translation to be atomic with
    612 		 * respect to other (higher-priority) interrupts.
    613 		 */
    614 		iflag = intr_clear();
    615 		vector = apic_xlate_vector(vector + APIC_BASE_VECT) -
    616 		    APIC_BASE_VECT;
    617 		intr_restore(iflag);
    618 	}
    619 
    620 	nipl = apic_ipls[vector];
    621 	*vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT];
    622 
    623 	if (apic_mode == LOCAL_APIC) {
    624 #if defined(__amd64)
    625 		setcr8((ulong_t)(apic_ipltopri[nipl] >> APIC_IPL_SHIFT));
    626 #else
    627 		if (apic_have_32bit_cr8)
    628 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
    629 			    APIC_IPL_SHIFT));
    630 		else
    631 			LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
    632 			    (uint32_t)apic_ipltopri[nipl]);
    633 #endif
    634 	} else {
    635 		X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
    636 	}
    637 
    638 	cpu_infop->aci_current[nipl] = (uchar_t)irq;
    639 	cpu_infop->aci_curipl = (uchar_t)nipl;
    640 	cpu_infop->aci_ISR_in_progress |= 1 << nipl;
    641 
    642 	/*
    643 	 * apic_level_intr could have been assimilated into the irq struct.
    644 	 * but, having it as a character array is more efficient in terms of
    645 	 * cache usage. So, we leave it as is.
    646 	 */
    647 	if (!apic_level_intr[irq]) {
    648 		if (apic_mode == LOCAL_APIC) {
    649 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
    650 		} else {
    651 			X2APIC_WRITE(APIC_EOI_REG, 0);
    652 		}
    653 	}
    654 
    655 #ifdef	DEBUG
    656 	APIC_DEBUG_BUF_PUT(vector);
    657 	APIC_DEBUG_BUF_PUT(irq);
    658 	APIC_DEBUG_BUF_PUT(nipl);
    659 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
    660 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
    661 		drv_usecwait(apic_stretch_interrupts);
    662 
    663 	if (apic_break_on_cpu == psm_get_cpu_id())
    664 		apic_break();
    665 #endif /* DEBUG */
    666 	return (nipl);
    667 }
    668 
    669 /*
    670  * This macro is a common code used by MMIO local apic and X2APIC
    671  * local apic.
    672  */
    673 #define	APIC_INTR_EXIT() \
    674 { \
    675 	cpu_infop = &apic_cpus[psm_get_cpu_id()]; \
    676 	if (apic_level_intr[irq]) \
    677 		apic_reg_ops->apic_send_eoi(irq); \
    678 	cpu_infop->aci_curipl = (uchar_t)prev_ipl; \
    679 	/* ISR above current pri could not be in progress */ \
    680 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; \
    681 }
    682 
    683 /*
    684  * Any changes made to this function must also change X2APIC
    685  * version of intr_exit.
    686  */
    687 void
    688 apic_intr_exit(int prev_ipl, int irq)
    689 {
    690 	apic_cpus_info_t *cpu_infop;
    691 
    692 #if defined(__amd64)
    693 	setcr8((ulong_t)apic_cr8pri[prev_ipl]);
    694 #else
    695 	if (apic_have_32bit_cr8)
    696 		setcr8((ulong_t)(apic_ipltopri[prev_ipl] >> APIC_IPL_SHIFT));
    697 	else
    698 		apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl];
    699 #endif
    700 
    701 	APIC_INTR_EXIT();
    702 }
    703 
    704 /*
    705  * Same as apic_intr_exit() except it uses MSR rather than MMIO
    706  * to access local apic registers.
    707  */
    708 void
    709 x2apic_intr_exit(int prev_ipl, int irq)
    710 {
    711 	apic_cpus_info_t *cpu_infop;
    712 
    713 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[prev_ipl]);
    714 	APIC_INTR_EXIT();
    715 }
    716 
    717 intr_exit_fn_t
    718 psm_intr_exit_fn(void)
    719 {
    720 	if (apic_mode == LOCAL_X2APIC)
    721 		return (x2apic_intr_exit);
    722 
    723 	return (apic_intr_exit);
    724 }
    725 
    726 /*
    727  * Mask all interrupts below or equal to the given IPL.
    728  * Any changes made to this function must also change X2APIC
    729  * version of setspl.
    730  */
    731 static void
    732 apic_setspl(int ipl)
    733 {
    734 #if defined(__amd64)
    735 	setcr8((ulong_t)apic_cr8pri[ipl]);
    736 #else
    737 	if (apic_have_32bit_cr8)
    738 		setcr8((ulong_t)(apic_ipltopri[ipl] >> APIC_IPL_SHIFT));
    739 	else
    740 		apicadr[APIC_TASK_REG] = apic_ipltopri[ipl];
    741 #endif
    742 
    743 	/* interrupts at ipl above this cannot be in progress */
    744 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
    745 	/*
    746 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
    747 	 * have enough time to come in before the priority is raised again
    748 	 * during the idle() loop.
    749 	 */
    750 	if (apic_setspl_delay)
    751 		(void) apic_reg_ops->apic_get_pri();
    752 }
    753 
    754 /*
    755  * X2APIC version of setspl.
    756  * Mask all interrupts below or equal to the given IPL
    757  */
    758 static void
    759 x2apic_setspl(int ipl)
    760 {
    761 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[ipl]);
    762 
    763 	/* interrupts at ipl above this cannot be in progress */
    764 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
    765 }
    766 
    767 /*ARGSUSED*/
    768 static int
    769 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
    770 {
    771 	return (apic_addspl_common(irqno, ipl, min_ipl, max_ipl));
    772 }
    773 
    774 static int
    775 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
    776 {
    777 	return (apic_delspl_common(irqno, ipl, min_ipl,  max_ipl));
    778 }
    779 
    780 static int
    781 apic_post_cpu_start(void)
    782 {
    783 	int cpun;
    784 	static int cpus_started = 1;
    785 
    786 	/* We know this CPU + BSP  started successfully. */
    787 	cpus_started++;
    788 
    789 	/*
    790 	 * On BSP we would have enabled X2APIC, if supported by processor,
    791 	 * in acpi_probe(), but on AP we do it here.
    792 	 *
    793 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
    794 	 * local APIC mode of the current CPU is MMIO (xAPIC).
    795 	 */
    796 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
    797 	    apic_local_mode() == LOCAL_APIC) {
    798 		apic_enable_x2apic();
    799 	}
    800 
    801 	/*
    802 	 * Switch back to x2apic IPI sending method for performance when target
    803 	 * CPU has entered x2apic mode.
    804 	 */
    805 	if (apic_mode == LOCAL_X2APIC) {
    806 		apic_switch_ipi_callback(B_FALSE);
    807 	}
    808 
    809 	splx(ipltospl(LOCK_LEVEL));
    810 	apic_init_intr();
    811 
    812 	/*
    813 	 * since some systems don't enable the internal cache on the non-boot
    814 	 * cpus, so we have to enable them here
    815 	 */
    816 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
    817 
    818 #ifdef	DEBUG
    819 	APIC_AV_PENDING_SET();
    820 #else
    821 	if (apic_mode == LOCAL_APIC)
    822 		APIC_AV_PENDING_SET();
    823 #endif	/* DEBUG */
    824 
    825 	/*
    826 	 * We may be booting, or resuming from suspend; aci_status will
    827 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
    828 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
    829 	 */
    830 	cpun = psm_get_cpu_id();
    831 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
    832 
    833 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
    834 	return (PSM_SUCCESS);
    835 }
    836 
    837 /*
    838  * type == -1 indicates it is an internal request. Do not change
    839  * resv_vector for these requests
    840  */
    841 static int
    842 apic_get_ipivect(int ipl, int type)
    843 {
    844 	uchar_t vector;
    845 	int irq;
    846 
    847 	if ((irq = apic_allocate_irq(APIC_VECTOR(ipl))) != -1) {
    848 		if (vector = apic_allocate_vector(ipl, irq, 1)) {
    849 			apic_irq_table[irq]->airq_mps_intr_index =
    850 			    RESERVE_INDEX;
    851 			apic_irq_table[irq]->airq_vector = vector;
    852 			if (type != -1) {
    853 				apic_resv_vector[ipl] = vector;
    854 			}
    855 			return (irq);
    856 		}
    857 	}
    858 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
    859 	return (-1);	/* shouldn't happen */
    860 }
    861 
    862 static int
    863 apic_getclkirq(int ipl)
    864 {
    865 	int	irq;
    866 
    867 	if ((irq = apic_get_ipivect(ipl, -1)) == -1)
    868 		return (-1);
    869 	/*
    870 	 * Note the vector in apic_clkvect for per clock handling.
    871 	 */
    872 	apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT;
    873 	APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n",
    874 	    apic_clkvect));
    875 	return (irq);
    876 }
    877 
    878 /*
    879  * Try and disable all interrupts. We just assign interrupts to other
    880  * processors based on policy. If any were bound by user request, we
    881  * let them continue and return failure. We do not bother to check
    882  * for cache affinity while rebinding.
    883  */
    884 
    885 static int
    886 apic_disable_intr(processorid_t cpun)
    887 {
    888 	int bind_cpu = 0, i, hardbound = 0;
    889 	apic_irq_t *irq_ptr;
    890 	ulong_t iflag;
    891 
    892 	iflag = intr_clear();
    893 	lock_set(&apic_ioapic_lock);
    894 
    895 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
    896 		if (apic_reprogram_info[i].done == B_FALSE) {
    897 			if (apic_reprogram_info[i].bindcpu == cpun) {
    898 				/*
    899 				 * CPU is busy -- it's the target of
    900 				 * a pending reprogramming attempt
    901 				 */
    902 				lock_clear(&apic_ioapic_lock);
    903 				intr_restore(iflag);
    904 				return (PSM_FAILURE);
    905 			}
    906 		}
    907 	}
    908 
    909 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
    910 
    911 	apic_cpus[cpun].aci_curipl = 0;
    912 
    913 	i = apic_min_device_irq;
    914 	for (; i <= apic_max_device_irq; i++) {
    915 		/*
    916 		 * If there are bound interrupts on this cpu, then
    917 		 * rebind them to other processors.
    918 		 */
    919 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
    920 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
    921 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
    922 			    (apic_cpu_in_range(irq_ptr->airq_temp_cpu)));
    923 
    924 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
    925 				hardbound = 1;
    926 				continue;
    927 			}
    928 
    929 			if (irq_ptr->airq_temp_cpu == cpun) {
    930 				do {
    931 					bind_cpu =
    932 					    apic_find_cpu(APIC_CPU_INTR_ENABLE);
    933 				} while (apic_rebind_all(irq_ptr, bind_cpu));
    934 			}
    935 		}
    936 	}
    937 
    938 	lock_clear(&apic_ioapic_lock);
    939 	intr_restore(iflag);
    940 
    941 	if (hardbound) {
    942 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
    943 		    "due to user bound interrupts", cpun);
    944 		return (PSM_FAILURE);
    945 	}
    946 	else
    947 		return (PSM_SUCCESS);
    948 }
    949 
    950 /*
    951  * Bind interrupts to the CPU's local APIC.
    952  * Interrupts should not be bound to a CPU's local APIC until the CPU
    953  * is ready to receive interrupts.
    954  */
    955 static void
    956 apic_enable_intr(processorid_t cpun)
    957 {
    958 	int	i;
    959 	apic_irq_t *irq_ptr;
    960 	ulong_t iflag;
    961 
    962 	iflag = intr_clear();
    963 	lock_set(&apic_ioapic_lock);
    964 
    965 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
    966 
    967 	i = apic_min_device_irq;
    968 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
    969 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
    970 			if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) {
    971 				(void) apic_rebind_all(irq_ptr,
    972 				    irq_ptr->airq_cpu);
    973 			}
    974 		}
    975 	}
    976 
    977 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND)
    978 		apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
    979 
    980 	lock_clear(&apic_ioapic_lock);
    981 	intr_restore(iflag);
    982 }
    983 
    984 /*
    985  * If this module needs a periodic handler for the interrupt distribution, it
    986  * can be added here. The argument to the periodic handler is not currently
    987  * used, but is reserved for future.
    988  */
    989 static void
    990 apic_post_cyclic_setup(void *arg)
    991 {
    992 _NOTE(ARGUNUSED(arg))
    993 	/* cpu_lock is held */
    994 	/* set up a periodic handler for intr redistribution */
    995 
    996 	/*
    997 	 * In peridoc mode intr redistribution processing is done in
    998 	 * apic_intr_enter during clk intr processing
    999 	 */
   1000 	if (!apic_oneshot)
   1001 		return;
   1002 	/*
   1003 	 * Register a periodical handler for the redistribution processing.
   1004 	 * On X86, CY_LOW_LEVEL is mapped to the level 2 interrupt, so
   1005 	 * DDI_IPL_2 should be passed to ddi_periodic_add() here.
   1006 	 */
   1007 	apic_periodic_id = ddi_periodic_add(
   1008 	    (void (*)(void *))apic_redistribute_compute, NULL,
   1009 	    apic_redistribute_sample_interval, DDI_IPL_2);
   1010 }
   1011 
   1012 static void
   1013 apic_redistribute_compute(void)
   1014 {
   1015 	int	i, j, max_busy;
   1016 
   1017 	if (apic_enable_dynamic_migration) {
   1018 		if (++apic_nticks == apic_sample_factor_redistribution) {
   1019 			/*
   1020 			 * Time to call apic_intr_redistribute().
   1021 			 * reset apic_nticks. This will cause max_busy
   1022 			 * to be calculated below and if it is more than
   1023 			 * apic_int_busy, we will do the whole thing
   1024 			 */
   1025 			apic_nticks = 0;
   1026 		}
   1027 		max_busy = 0;
   1028 		for (i = 0; i < apic_nproc; i++) {
   1029 			if (!apic_cpu_in_range(i))
   1030 				continue;
   1031 
   1032 			/*
   1033 			 * Check if curipl is non zero & if ISR is in
   1034 			 * progress
   1035 			 */
   1036 			if (((j = apic_cpus[i].aci_curipl) != 0) &&
   1037 			    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
   1038 
   1039 				int	irq;
   1040 				apic_cpus[i].aci_busy++;
   1041 				irq = apic_cpus[i].aci_current[j];
   1042 				apic_irq_table[irq]->airq_busy++;
   1043 			}
   1044 
   1045 			if (!apic_nticks &&
   1046 			    (apic_cpus[i].aci_busy > max_busy))
   1047 				max_busy = apic_cpus[i].aci_busy;
   1048 		}
   1049 		if (!apic_nticks) {
   1050 			if (max_busy > apic_int_busy_mark) {
   1051 			/*
   1052 			 * We could make the following check be
   1053 			 * skipped > 1 in which case, we get a
   1054 			 * redistribution at half the busy mark (due to
   1055 			 * double interval). Need to be able to collect
   1056 			 * more empirical data to decide if that is a
   1057 			 * good strategy. Punt for now.
   1058 			 */
   1059 				if (apic_skipped_redistribute) {
   1060 					apic_cleanup_busy();
   1061 					apic_skipped_redistribute = 0;
   1062 				} else {
   1063 					apic_intr_redistribute();
   1064 				}
   1065 			} else
   1066 				apic_skipped_redistribute++;
   1067 		}
   1068 	}
   1069 }
   1070 
   1071 
   1072 /*
   1073  * The following functions are in the platform specific file so that they
   1074  * can be different functions depending on whether we are running on
   1075  * bare metal or a hypervisor.
   1076  */
   1077 
   1078 /*
   1079  * Check to make sure there are enough irq slots
   1080  */
   1081 int
   1082 apic_check_free_irqs(int count)
   1083 {
   1084 	int i, avail;
   1085 
   1086 	avail = 0;
   1087 	for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) {
   1088 		if ((apic_irq_table[i] == NULL) ||
   1089 		    apic_irq_table[i]->airq_mps_intr_index == FREE_INDEX) {
   1090 			if (++avail >= count)
   1091 				return (PSM_SUCCESS);
   1092 		}
   1093 	}
   1094 	return (PSM_FAILURE);
   1095 }
   1096 
   1097 /*
   1098  * This function allocates "count" MSI vector(s) for the given "dip/pri/type"
   1099  */
   1100 int
   1101 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
   1102     int behavior)
   1103 {
   1104 	int	rcount, i;
   1105 	uchar_t	start, irqno;
   1106 	uint32_t cpu;
   1107 	major_t	major;
   1108 	apic_irq_t	*irqptr;
   1109 
   1110 	DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p "
   1111 	    "inum=0x%x  pri=0x%x count=0x%x behavior=%d\n",
   1112 	    (void *)dip, inum, pri, count, behavior));
   1113 
   1114 	if (count > 1) {
   1115 		if (behavior == DDI_INTR_ALLOC_STRICT &&
   1116 		    apic_multi_msi_enable == 0)
   1117 			return (0);
   1118 		if (apic_multi_msi_enable == 0)
   1119 			count = 1;
   1120 	}
   1121 
   1122 	if ((rcount = apic_navail_vector(dip, pri)) > count)
   1123 		rcount = count;
   1124 	else if (rcount == 0 || (rcount < count &&
   1125 	    behavior == DDI_INTR_ALLOC_STRICT))
   1126 		return (0);
   1127 
   1128 	/* if not ISP2, then round it down */
   1129 	if (!ISP2(rcount))
   1130 		rcount = 1 << (highbit(rcount) - 1);
   1131 
   1132 	mutex_enter(&airq_mutex);
   1133 
   1134 	for (start = 0; rcount > 0; rcount >>= 1) {
   1135 		if ((start = apic_find_multi_vectors(pri, rcount)) != 0 ||
   1136 		    behavior == DDI_INTR_ALLOC_STRICT)
   1137 			break;
   1138 	}
   1139 
   1140 	if (start == 0) {
   1141 		/* no vector available */
   1142 		mutex_exit(&airq_mutex);
   1143 		return (0);
   1144 	}
   1145 
   1146 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
   1147 		/* not enough free irq slots available */
   1148 		mutex_exit(&airq_mutex);
   1149 		return (0);
   1150 	}
   1151 
   1152 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
   1153 	for (i = 0; i < rcount; i++) {
   1154 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
   1155 		    (uchar_t)-1) {
   1156 			/*
   1157 			 * shouldn't happen because of the
   1158 			 * apic_check_free_irqs() check earlier
   1159 			 */
   1160 			mutex_exit(&airq_mutex);
   1161 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
   1162 			    "apic_allocate_irq failed\n"));
   1163 			return (i);
   1164 		}
   1165 		apic_max_device_irq = max(irqno, apic_max_device_irq);
   1166 		apic_min_device_irq = min(irqno, apic_min_device_irq);
   1167 		irqptr = apic_irq_table[irqno];
   1168 #ifdef	DEBUG
   1169 		if (apic_vector_to_irq[start + i] != APIC_RESV_IRQ)
   1170 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
   1171 			    "apic_vector_to_irq is not APIC_RESV_IRQ\n"));
   1172 #endif
   1173 		apic_vector_to_irq[start + i] = (uchar_t)irqno;
   1174 
   1175 		irqptr->airq_vector = (uchar_t)(start + i);
   1176 		irqptr->airq_ioapicindex = (uchar_t)inum;	/* start */
   1177 		irqptr->airq_intin_no = (uchar_t)rcount;
   1178 		irqptr->airq_ipl = pri;
   1179 		irqptr->airq_vector = start + i;
   1180 		irqptr->airq_origirq = (uchar_t)(inum + i);
   1181 		irqptr->airq_share_id = 0;
   1182 		irqptr->airq_mps_intr_index = MSI_INDEX;
   1183 		irqptr->airq_dip = dip;
   1184 		irqptr->airq_major = major;
   1185 		if (i == 0) /* they all bound to the same cpu */
   1186 			cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno,
   1187 			    0xff, 0xff);
   1188 		else
   1189 			irqptr->airq_cpu = cpu;
   1190 		DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x "
   1191 		    "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno,
   1192 		    (void *)irqptr->airq_dip, irqptr->airq_vector,
   1193 		    irqptr->airq_origirq, pri));
   1194 	}
   1195 	mutex_exit(&airq_mutex);
   1196 	return (rcount);
   1197 }
   1198 
   1199 /*
   1200  * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type"
   1201  */
   1202 int
   1203 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
   1204     int behavior)
   1205 {
   1206 	int	rcount, i;
   1207 	major_t	major;
   1208 
   1209 	mutex_enter(&airq_mutex);
   1210 
   1211 	if ((rcount = apic_navail_vector(dip, pri)) > count)
   1212 		rcount = count;
   1213 	else if (rcount == 0 || (rcount < count &&
   1214 	    behavior == DDI_INTR_ALLOC_STRICT)) {
   1215 		rcount = 0;
   1216 		goto out;
   1217 	}
   1218 
   1219 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
   1220 		/* not enough free irq slots available */
   1221 		rcount = 0;
   1222 		goto out;
   1223 	}
   1224 
   1225 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
   1226 	for (i = 0; i < rcount; i++) {
   1227 		uchar_t	vector, irqno;
   1228 		apic_irq_t	*irqptr;
   1229 
   1230 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
   1231 		    (uchar_t)-1) {
   1232 			/*
   1233 			 * shouldn't happen because of the
   1234 			 * apic_check_free_irqs() check earlier
   1235 			 */
   1236 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
   1237 			    "apic_allocate_irq failed\n"));
   1238 			rcount = i;
   1239 			goto out;
   1240 		}
   1241 		if ((vector = apic_allocate_vector(pri, irqno, 1)) == 0) {
   1242 			/*
   1243 			 * shouldn't happen because of the
   1244 			 * apic_navail_vector() call earlier
   1245 			 */
   1246 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
   1247 			    "apic_allocate_vector failed\n"));
   1248 			rcount = i;
   1249 			goto out;
   1250 		}
   1251 		apic_max_device_irq = max(irqno, apic_max_device_irq);
   1252 		apic_min_device_irq = min(irqno, apic_min_device_irq);
   1253 		irqptr = apic_irq_table[irqno];
   1254 		irqptr->airq_vector = (uchar_t)vector;
   1255 		irqptr->airq_ipl = pri;
   1256 		irqptr->airq_origirq = (uchar_t)(inum + i);
   1257 		irqptr->airq_share_id = 0;
   1258 		irqptr->airq_mps_intr_index = MSIX_INDEX;
   1259 		irqptr->airq_dip = dip;
   1260 		irqptr->airq_major = major;
   1261 		irqptr->airq_cpu = apic_bind_intr(dip, irqno, 0xff, 0xff);
   1262 	}
   1263 out:
   1264 	mutex_exit(&airq_mutex);
   1265 	return (rcount);
   1266 }
   1267 
   1268 /*
   1269  * Allocate a free vector for irq at ipl. Takes care of merging of multiple
   1270  * IPLs into a single APIC level as well as stretching some IPLs onto multiple
   1271  * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority
   1272  * requests and allocated only when pri is set.
   1273  */
   1274 uchar_t
   1275 apic_allocate_vector(int ipl, int irq, int pri)
   1276 {
   1277 	int	lowest, highest, i;
   1278 
   1279 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
   1280 	lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL;
   1281 
   1282 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
   1283 		lowest -= APIC_VECTOR_PER_IPL;
   1284 
   1285 #ifdef	DEBUG
   1286 	if (apic_restrict_vector)	/* for testing shared interrupt logic */
   1287 		highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS;
   1288 #endif /* DEBUG */
   1289 	if (pri == 0)
   1290 		highest -= APIC_HI_PRI_VECTS;
   1291 
   1292 	for (i = lowest; i <= highest; i++) {
   1293 		if (APIC_CHECK_RESERVE_VECTORS(i))
   1294 			continue;
   1295 		if (apic_vector_to_irq[i] == APIC_RESV_IRQ) {
   1296 			apic_vector_to_irq[i] = (uchar_t)irq;
   1297 			return (i);
   1298 		}
   1299 	}
   1300 
   1301 	return (0);
   1302 }
   1303 
   1304 /* Mark vector as not being used by any irq */
   1305 void
   1306 apic_free_vector(uchar_t vector)
   1307 {
   1308 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
   1309 }
   1310 
   1311 /*
   1312  * Call rebind to do the actual programming.
   1313  * Must be called with interrupts disabled and apic_ioapic_lock held
   1314  * 'p' is polymorphic -- if this function is called to process a deferred
   1315  * reprogramming, p is of type 'struct ioapic_reprogram_data *', from which
   1316  * the irq pointer is retrieved.  If not doing deferred reprogramming,
   1317  * p is of the type 'apic_irq_t *'.
   1318  *
   1319  * apic_ioapic_lock must be held across this call, as it protects apic_rebind
   1320  * and it protects apic_get_next_bind_cpu() from a race in which a CPU can be
   1321  * taken offline after a cpu is selected, but before apic_rebind is called to
   1322  * bind interrupts to it.
   1323  */
   1324 int
   1325 apic_setup_io_intr(void *p, int irq, boolean_t deferred)
   1326 {
   1327 	apic_irq_t *irqptr;
   1328 	struct ioapic_reprogram_data *drep = NULL;
   1329 	int rv;
   1330 
   1331 	if (deferred) {
   1332 		drep = (struct ioapic_reprogram_data *)p;
   1333 		ASSERT(drep != NULL);
   1334 		irqptr = drep->irqp;
   1335 	} else
   1336 		irqptr = (apic_irq_t *)p;
   1337 
   1338 	ASSERT(irqptr != NULL);
   1339 
   1340 	rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, drep);
   1341 	if (rv) {
   1342 		/*
   1343 		 * CPU is not up or interrupts are disabled. Fall back to
   1344 		 * the first available CPU
   1345 		 */
   1346 		rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE),
   1347 		    drep);
   1348 	}
   1349 
   1350 	return (rv);
   1351 }
   1352 
   1353 
   1354 uchar_t
   1355 apic_modify_vector(uchar_t vector, int irq)
   1356 {
   1357 	apic_vector_to_irq[vector] = (uchar_t)irq;
   1358 	return (vector);
   1359 }
   1360 
   1361 char *
   1362 apic_get_apic_type(void)
   1363 {
   1364 	return (apic_psm_info.p_mach_idstring);
   1365 }
   1366 
   1367 void
   1368 x2apic_update_psm(void)
   1369 {
   1370 	struct psm_ops *pops = &apic_ops;
   1371 
   1372 	ASSERT(pops != NULL);
   1373 
   1374 	pops->psm_intr_exit = x2apic_intr_exit;
   1375 	pops->psm_setspl = x2apic_setspl;
   1376 
   1377 	pops->psm_send_ipi =  x2apic_send_ipi;
   1378 	send_dirintf = pops->psm_send_ipi;
   1379 
   1380 	apic_mode = LOCAL_X2APIC;
   1381 	apic_change_ops();
   1382 }
   1383