1 5084 johnlev /* 2 5084 johnlev * CDDL HEADER START 3 5084 johnlev * 4 5084 johnlev * The contents of this file are subject to the terms of the 5 5084 johnlev * Common Development and Distribution License (the "License"). 6 5084 johnlev * You may not use this file except in compliance with the License. 7 5084 johnlev * 8 5084 johnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 5084 johnlev * or http://www.opensolaris.org/os/licensing. 10 5084 johnlev * See the License for the specific language governing permissions 11 5084 johnlev * and limitations under the License. 12 5084 johnlev * 13 5084 johnlev * When distributing Covered Code, include this CDDL HEADER in each 14 5084 johnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 5084 johnlev * If applicable, add the following below this CDDL HEADER, with the 16 5084 johnlev * fields enclosed by brackets "[]" replaced with your own identifying 17 5084 johnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18 5084 johnlev * 19 5084 johnlev * CDDL HEADER END 20 5084 johnlev */ 21 5084 johnlev 22 5084 johnlev /* 23 8925 Evan * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 5084 johnlev * Use is subject to license terms. 25 5084 johnlev */ 26 5084 johnlev 27 7767 John #define PSMI_1_6 28 5084 johnlev 29 5084 johnlev #include <sys/mutex.h> 30 5084 johnlev #include <sys/types.h> 31 5084 johnlev #include <sys/time.h> 32 5084 johnlev #include <sys/clock.h> 33 5084 johnlev #include <sys/machlock.h> 34 5084 johnlev #include <sys/smp_impldefs.h> 35 5084 johnlev #include <sys/uadmin.h> 36 5084 johnlev #include <sys/promif.h> 37 5084 johnlev #include <sys/psm.h> 38 5084 johnlev #include <sys/psm_common.h> 39 5084 johnlev #include <sys/atomic.h> 40 5084 johnlev #include <sys/apic.h> 41 5084 johnlev #include <sys/archsystm.h> 42 5084 johnlev #include <sys/mach_intr.h> 43 5084 johnlev #include <sys/hypervisor.h> 44 5084 johnlev #include <sys/evtchn_impl.h> 45 5084 johnlev #include <sys/modctl.h> 46 5084 johnlev #include <sys/trap.h> 47 5084 johnlev #include <sys/panic.h> 48 10175 Stuart #include <sys/sysmacros.h> 49 10175 Stuart #include <sys/pci_intr_lib.h> 50 10175 Stuart #include <vm/hat_i86.h> 51 5084 johnlev 52 5084 johnlev #include <xen/public/vcpu.h> 53 5084 johnlev #include <xen/public/physdev.h> 54 5084 johnlev 55 5084 johnlev 56 5084 johnlev /* 57 5084 johnlev * Global Data 58 5084 johnlev */ 59 5084 johnlev 60 5084 johnlev int xen_psm_verbose = 0; 61 5084 johnlev 62 7282 mishra /* As of now we don't support x2apic in xVM */ 63 5084 johnlev volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 64 5084 johnlev int apic_error = 0; 65 5084 johnlev int apic_verbose = 0; 66 5084 johnlev cpuset_t apic_cpumask; 67 5084 johnlev int apic_forceload = 0; 68 5084 johnlev uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 69 5084 johnlev 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 70 5084 johnlev }; 71 5084 johnlev uchar_t apic_ipltopri[MAXIPL + 1]; 72 5084 johnlev uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 73 5084 johnlev uint_t apic_picinit_called; 74 5084 johnlev apic_cpus_info_t *apic_cpus; 75 5084 johnlev int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 76 5084 johnlev /* use to make sure only one cpu handles the nmi */ 77 5084 johnlev static lock_t xen_psm_nmi_lock; 78 5084 johnlev int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 79 5084 johnlev int xen_psm_panic_on_nmi = 0; 80 5084 johnlev int xen_psm_num_nmis = 0; 81 5084 johnlev 82 5084 johnlev cpuset_t xen_psm_cpus_online; /* online cpus */ 83 5084 johnlev int xen_psm_ncpus = 1; /* cpu count */ 84 5084 johnlev int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 85 5084 johnlev 86 10323 Stuart int xen_support_msi = 0; 87 5084 johnlev 88 5084 johnlev static int xen_clock_irq = INVALID_IRQ; 89 5084 johnlev 90 5084 johnlev /* flag definitions for xen_psm_verbose */ 91 5084 johnlev #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 92 5084 johnlev #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 93 5084 johnlev #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 94 5084 johnlev 95 5084 johnlev #define XEN_PSM_VERBOSE_IRQ(fmt) \ 96 5084 johnlev if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 97 5084 johnlev cmn_err fmt; 98 5084 johnlev 99 5084 johnlev #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 100 5084 johnlev if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 101 5084 johnlev prom_printf fmt; 102 5084 johnlev 103 5084 johnlev /* 104 5084 johnlev * Dummy apic array to point common routines at that want to do some apic 105 5084 johnlev * manipulation. Xen doesn't allow guest apic access so we point at these 106 5084 johnlev * memory locations to fake out those who want to do apic fiddling. 107 5084 johnlev */ 108 5084 johnlev uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 109 5084 johnlev 110 5084 johnlev static struct psm_info xen_psm_info; 111 5084 johnlev static void xen_psm_setspl(int); 112 5084 johnlev 113 10175 Stuart int 114 10175 Stuart apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 115 10175 Stuart int behavior); 116 10175 Stuart int 117 10175 Stuart apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 118 10175 Stuart int behavior); 119 5084 johnlev 120 5084 johnlev /* 121 5084 johnlev * Local support routines 122 5084 johnlev */ 123 5084 johnlev 124 5084 johnlev /* 125 5084 johnlev * Select vcpu to bind xen virtual device interrupt to. 126 5084 johnlev */ 127 5084 johnlev /*ARGSUSED*/ 128 5084 johnlev int 129 5084 johnlev xen_psm_bind_intr(int irq) 130 5084 johnlev { 131 10175 Stuart int bind_cpu; 132 5084 johnlev apic_irq_t *irqptr; 133 5084 johnlev 134 10175 Stuart bind_cpu = IRQ_UNBOUND; 135 5084 johnlev if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 136 10175 Stuart return (bind_cpu); 137 5084 johnlev if (irq <= APIC_MAX_VECTOR) 138 5084 johnlev irqptr = apic_irq_table[irq]; 139 5084 johnlev else 140 5084 johnlev irqptr = NULL; 141 10175 Stuart if (irqptr && (irqptr->airq_cpu != IRQ_UNBOUND)) 142 10175 Stuart bind_cpu = irqptr->airq_cpu & ~IRQ_USER_BOUND; 143 10175 Stuart if (bind_cpu != IRQ_UNBOUND) { 144 10175 Stuart if (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)) 145 5084 johnlev bind_cpu = 0; 146 5084 johnlev goto done; 147 5084 johnlev } 148 5084 johnlev if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 149 5084 johnlev do { 150 5084 johnlev bind_cpu = xen_psm_next_bind_cpu++; 151 5084 johnlev if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 152 5084 johnlev xen_psm_next_bind_cpu = 0; 153 5084 johnlev } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 154 5084 johnlev } else { 155 5084 johnlev bind_cpu = 0; 156 5084 johnlev } 157 5084 johnlev done: 158 5084 johnlev return (bind_cpu); 159 5084 johnlev } 160 5084 johnlev 161 5084 johnlev /* 162 5084 johnlev * Autoconfiguration Routines 163 5084 johnlev */ 164 5084 johnlev 165 5084 johnlev static int 166 5084 johnlev xen_psm_probe(void) 167 5084 johnlev { 168 5084 johnlev int ret = PSM_SUCCESS; 169 5084 johnlev 170 5084 johnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) 171 5084 johnlev ret = apic_probe_common(xen_psm_info.p_mach_idstring); 172 5084 johnlev return (ret); 173 5084 johnlev } 174 5084 johnlev 175 5084 johnlev static void 176 5084 johnlev xen_psm_softinit(void) 177 5084 johnlev { 178 5084 johnlev /* LINTED logical expression always true: op "||" */ 179 5084 johnlev ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 180 5529 smaybe CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 181 5084 johnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) { 182 5084 johnlev apic_init_common(); 183 5084 johnlev } 184 5084 johnlev } 185 5084 johnlev 186 5084 johnlev #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 187 5084 johnlev 188 5084 johnlev /*ARGSUSED*/ 189 5084 johnlev static int 190 5084 johnlev xen_psm_clkinit(int hertz) 191 5084 johnlev { 192 5084 johnlev extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 193 5084 johnlev extern int dosynctodr; 194 5084 johnlev 195 5084 johnlev /* 196 5084 johnlev * domU cannot set the TOD hardware, fault the TOD clock now to 197 5084 johnlev * indicate that and turn off attempts to sync TOD hardware 198 5084 johnlev * with the hires timer. 199 5084 johnlev */ 200 5084 johnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 201 5084 johnlev mutex_enter(&tod_lock); 202 5084 johnlev (void) tod_fault(TOD_RDONLY, 0); 203 5084 johnlev dosynctodr = 0; 204 5084 johnlev mutex_exit(&tod_lock); 205 5084 johnlev } 206 5084 johnlev /* 207 5084 johnlev * The hypervisor provides a timer based on the local APIC timer. 208 5084 johnlev * The interface supports requests of nanosecond resolution. 209 5084 johnlev * A common frequency of the apic clock is 100 Mhz which 210 5084 johnlev * gives a resolution of 10 nsec per tick. What we would really like 211 5084 johnlev * is a way to get the ns per tick value from xen. 212 5084 johnlev * XXPV - This is an assumption that needs checking and may change 213 5084 johnlev */ 214 5084 johnlev return (XEN_NSEC_PER_TICK); 215 5084 johnlev } 216 5084 johnlev 217 5084 johnlev static void 218 5084 johnlev xen_psm_hrtimeinit(void) 219 5084 johnlev { 220 5084 johnlev extern int gethrtime_hires; 221 5084 johnlev gethrtime_hires = 1; 222 5084 johnlev } 223 5084 johnlev 224 5084 johnlev /* xen_psm NMI handler */ 225 5084 johnlev /*ARGSUSED*/ 226 5084 johnlev static void 227 5084 johnlev xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 228 5084 johnlev { 229 5084 johnlev xen_psm_num_nmis++; 230 5084 johnlev 231 5084 johnlev if (!lock_try(&xen_psm_nmi_lock)) 232 5084 johnlev return; 233 5084 johnlev 234 5084 johnlev if (xen_psm_kmdb_on_nmi && psm_debugger()) { 235 5084 johnlev debug_enter("NMI received: entering kmdb\n"); 236 5084 johnlev } else if (xen_psm_panic_on_nmi) { 237 5084 johnlev /* Keep panic from entering kmdb. */ 238 5084 johnlev nopanicdebug = 1; 239 5084 johnlev panic("NMI received\n"); 240 5084 johnlev } else { 241 5084 johnlev /* 242 5084 johnlev * prom_printf is the best shot we have of something which is 243 5084 johnlev * problem free from high level/NMI type of interrupts 244 5084 johnlev */ 245 5084 johnlev prom_printf("NMI received\n"); 246 5084 johnlev } 247 5084 johnlev 248 5084 johnlev lock_clear(&xen_psm_nmi_lock); 249 5084 johnlev } 250 5084 johnlev 251 5084 johnlev static void 252 5084 johnlev xen_psm_picinit() 253 5084 johnlev { 254 5084 johnlev int cpu, irqno; 255 5084 johnlev cpuset_t cpus; 256 5084 johnlev 257 5084 johnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) { 258 5084 johnlev /* set a flag so we know we have run xen_psm_picinit() */ 259 5084 johnlev apic_picinit_called = 1; 260 5084 johnlev LOCK_INIT_CLEAR(&apic_ioapic_lock); 261 5084 johnlev 262 5084 johnlev /* XXPV - do we need to do this? */ 263 5084 johnlev picsetup(); /* initialise the 8259 */ 264 5084 johnlev 265 5084 johnlev /* enable apic mode if imcr present */ 266 5084 johnlev /* XXPV - do we need to do this either? */ 267 5084 johnlev if (apic_imcrp) { 268 5084 johnlev outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 269 5084 johnlev outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 270 5084 johnlev } 271 5084 johnlev 272 5084 johnlev ioapic_init_intr(IOAPIC_NOMASK); 273 5084 johnlev /* 274 5084 johnlev * We never called xen_psm_addspl() when the SCI 275 5084 johnlev * interrupt was added because that happened before the 276 5084 johnlev * PSM module was loaded. Fix that up here by doing 277 5084 johnlev * any missed operations (e.g. bind to CPU) 278 5084 johnlev */ 279 5084 johnlev if ((irqno = apic_sci_vect) > 0) { 280 5084 johnlev if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 281 5084 johnlev CPUSET_ZERO(cpus); 282 5084 johnlev CPUSET_OR(cpus, xen_psm_cpus_online); 283 5084 johnlev } else { 284 5084 johnlev CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 285 5084 johnlev } 286 5084 johnlev ec_set_irq_affinity(irqno, cpus); 287 5529 smaybe apic_irq_table[irqno]->airq_temp_cpu = 288 5529 smaybe (uchar_t)(cpu & ~IRQ_USER_BOUND); 289 5084 johnlev ec_enable_irq(irqno); 290 5084 johnlev } 291 5084 johnlev } 292 5084 johnlev 293 5084 johnlev /* add nmi handler - least priority nmi handler */ 294 5084 johnlev LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 295 5084 johnlev 296 5084 johnlev if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 297 5529 smaybe "xVM_psm NMI handler", (caddr_t)NULL)) 298 5529 smaybe cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 299 5084 johnlev } 300 5084 johnlev 301 5084 johnlev 302 5084 johnlev /* 303 5084 johnlev * generates an interprocessor interrupt to another CPU 304 5084 johnlev */ 305 5084 johnlev static void 306 5084 johnlev xen_psm_send_ipi(int cpun, int ipl) 307 5084 johnlev { 308 5084 johnlev ulong_t flag = intr_clear(); 309 5084 johnlev 310 5084 johnlev ec_send_ipi(ipl, cpun); 311 5084 johnlev intr_restore(flag); 312 5084 johnlev } 313 5084 johnlev 314 5084 johnlev /*ARGSUSED*/ 315 5084 johnlev static int 316 5084 johnlev xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 317 5084 johnlev { 318 5084 johnlev int cpu, ret; 319 5084 johnlev cpuset_t cpus; 320 5084 johnlev 321 5084 johnlev /* 322 5084 johnlev * We are called at splhi() so we can't call anything that might end 323 5084 johnlev * up trying to context switch. 324 5084 johnlev */ 325 5084 johnlev if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 326 5084 johnlev DOMAIN_IS_INITDOMAIN(xen_info)) { 327 5084 johnlev /* 328 5084 johnlev * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 329 5084 johnlev */ 330 5084 johnlev ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 331 5084 johnlev } else { 332 5084 johnlev /* 333 5084 johnlev * Set priority/affinity/enable for non PIRQs 334 5084 johnlev */ 335 5084 johnlev ret = ec_set_irq_priority(irqno, ipl); 336 5084 johnlev ASSERT(ret == 0); 337 5084 johnlev if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 338 5084 johnlev CPUSET_ZERO(cpus); 339 5084 johnlev CPUSET_OR(cpus, xen_psm_cpus_online); 340 5084 johnlev } else { 341 5084 johnlev CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 342 5084 johnlev } 343 5084 johnlev ec_set_irq_affinity(irqno, cpus); 344 5084 johnlev ec_enable_irq(irqno); 345 5084 johnlev } 346 5084 johnlev return (ret); 347 5084 johnlev } 348 5084 johnlev 349 5084 johnlev /* 350 5084 johnlev * Acquire ownership of this irq on this cpu 351 5084 johnlev */ 352 5084 johnlev void 353 5084 johnlev xen_psm_acquire_irq(int irq) 354 5084 johnlev { 355 5084 johnlev ulong_t flags; 356 5084 johnlev int cpuid; 357 5084 johnlev 358 5084 johnlev /* 359 5084 johnlev * If the irq is currently being serviced by another cpu 360 5084 johnlev * we busy-wait for the other cpu to finish. Take any 361 5084 johnlev * pending interrupts before retrying. 362 5084 johnlev */ 363 5084 johnlev do { 364 5084 johnlev flags = intr_clear(); 365 5084 johnlev cpuid = ec_block_irq(irq); 366 5084 johnlev intr_restore(flags); 367 5084 johnlev } while (cpuid != CPU->cpu_id); 368 5084 johnlev } 369 5084 johnlev 370 5084 johnlev /*ARGSUSED*/ 371 5084 johnlev static int 372 5084 johnlev xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 373 5084 johnlev { 374 5084 johnlev apic_irq_t *irqptr; 375 5084 johnlev int err = PSM_SUCCESS; 376 5084 johnlev 377 5084 johnlev if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 378 5084 johnlev DOMAIN_IS_INITDOMAIN(xen_info)) { 379 5084 johnlev irqptr = apic_irq_table[irqno]; 380 5084 johnlev /* 381 5084 johnlev * unbind if no more sharers of this irq/evtchn 382 5084 johnlev */ 383 5084 johnlev if (irqptr->airq_share == 1) { 384 5084 johnlev xen_psm_acquire_irq(irqno); 385 5084 johnlev ec_unbind_irq(irqno); 386 5084 johnlev } 387 5084 johnlev err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 388 5084 johnlev /* 389 5084 johnlev * If still in use reset priority 390 5084 johnlev */ 391 5084 johnlev if (!err && irqptr->airq_share != 0) { 392 5084 johnlev err = ec_set_irq_priority(irqno, max_ipl); 393 5084 johnlev return (err); 394 5084 johnlev } 395 5084 johnlev } else { 396 5084 johnlev xen_psm_acquire_irq(irqno); 397 5084 johnlev ec_unbind_irq(irqno); 398 5084 johnlev } 399 5084 johnlev return (err); 400 5084 johnlev } 401 5084 johnlev 402 5084 johnlev static processorid_t 403 5084 johnlev xen_psm_get_next_processorid(processorid_t id) 404 5084 johnlev { 405 5084 johnlev if (id == -1) 406 5084 johnlev return (0); 407 5084 johnlev 408 5084 johnlev for (id++; id < NCPU; id++) { 409 5084 johnlev switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 410 5084 johnlev case 0: /* yeah, that one's there */ 411 5084 johnlev return (id); 412 5084 johnlev default: 413 5084 johnlev case X_EINVAL: /* out of range */ 414 5084 johnlev return (-1); 415 5084 johnlev case X_ENOENT: /* not present in the domain */ 416 5084 johnlev /* 417 5084 johnlev * It's not clear that we -need- to keep looking 418 5084 johnlev * at this point, if, e.g., we can guarantee 419 5084 johnlev * the hypervisor always keeps a contiguous range 420 5084 johnlev * of vcpus around this is equivalent to "out of range". 421 5084 johnlev * 422 5084 johnlev * But it would be sad to miss a vcpu we're 423 5084 johnlev * supposed to be using .. 424 5084 johnlev */ 425 5084 johnlev break; 426 5084 johnlev } 427 5084 johnlev } 428 5084 johnlev 429 5084 johnlev return (-1); 430 5084 johnlev } 431 5084 johnlev 432 5084 johnlev /* 433 5084 johnlev * XXPV - undo the start cpu op change; return to ignoring this value 434 5084 johnlev * - also tweak error handling in main startup loop 435 5084 johnlev */ 436 5084 johnlev /*ARGSUSED*/ 437 5084 johnlev static int 438 5084 johnlev xen_psm_cpu_start(processorid_t id, caddr_t arg) 439 5084 johnlev { 440 5084 johnlev int ret; 441 5084 johnlev 442 5084 johnlev ASSERT(id > 0); 443 5529 smaybe CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 444 5084 johnlev ec_bind_cpu_ipis(id); 445 5084 johnlev (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 446 5084 johnlev if ((ret = xen_vcpu_up(id)) == 0) 447 5084 johnlev xen_psm_ncpus++; 448 5084 johnlev else 449 5084 johnlev ret = EINVAL; 450 5084 johnlev return (ret); 451 5084 johnlev } 452 5084 johnlev 453 5084 johnlev /* 454 5084 johnlev * Allocate an irq for inter cpu signaling 455 5084 johnlev */ 456 5084 johnlev /*ARGSUSED*/ 457 5084 johnlev static int 458 5084 johnlev xen_psm_get_ipivect(int ipl, int type) 459 5084 johnlev { 460 5084 johnlev return (ec_bind_ipi_to_irq(ipl, 0)); 461 5084 johnlev } 462 5084 johnlev 463 5084 johnlev /*ARGSUSED*/ 464 5084 johnlev static int 465 5084 johnlev xen_psm_get_clockirq(int ipl) 466 5084 johnlev { 467 5084 johnlev if (xen_clock_irq != INVALID_IRQ) 468 5084 johnlev return (xen_clock_irq); 469 5084 johnlev 470 5084 johnlev xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 471 5084 johnlev return (xen_clock_irq); 472 5084 johnlev } 473 5084 johnlev 474 5084 johnlev /*ARGSUSED*/ 475 5084 johnlev static void 476 5084 johnlev xen_psm_shutdown(int cmd, int fcn) 477 5084 johnlev { 478 5084 johnlev XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 479 5084 johnlev 480 5084 johnlev switch (cmd) { 481 5084 johnlev case A_SHUTDOWN: 482 5084 johnlev switch (fcn) { 483 5084 johnlev case AD_BOOT: 484 5084 johnlev case AD_IBOOT: 485 5084 johnlev (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 486 5084 johnlev break; 487 5084 johnlev case AD_POWEROFF: 488 5084 johnlev /* fall through if domU or if poweroff fails */ 489 5084 johnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) 490 5084 johnlev if (apic_enable_acpi) 491 5084 johnlev (void) acpi_poweroff(); 492 5084 johnlev /* FALLTHRU */ 493 5084 johnlev case AD_HALT: 494 5084 johnlev default: 495 5084 johnlev (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 496 5084 johnlev break; 497 5084 johnlev } 498 5084 johnlev break; 499 5084 johnlev case A_REBOOT: 500 5084 johnlev (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 501 5084 johnlev break; 502 5084 johnlev default: 503 5084 johnlev return; 504 5084 johnlev } 505 5084 johnlev } 506 5084 johnlev 507 5084 johnlev 508 5084 johnlev static int 509 5084 johnlev xen_psm_translate_irq(dev_info_t *dip, int irqno) 510 5084 johnlev { 511 5084 johnlev if (dip == NULL) { 512 5084 johnlev XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 513 5084 johnlev " dip = NULL\n", irqno)); 514 5084 johnlev return (irqno); 515 5084 johnlev } 516 5084 johnlev return (irqno); 517 5084 johnlev } 518 5084 johnlev 519 5084 johnlev /* 520 5084 johnlev * xen_psm_intr_enter() acks the event that triggered the interrupt and 521 5084 johnlev * returns the new priority level, 522 5084 johnlev */ 523 5084 johnlev /*ARGSUSED*/ 524 5084 johnlev static int 525 5084 johnlev xen_psm_intr_enter(int ipl, int *vector) 526 5084 johnlev { 527 5084 johnlev int newipl; 528 5084 johnlev uint_t intno; 529 5084 johnlev cpu_t *cpu = CPU; 530 5084 johnlev 531 5084 johnlev intno = (*vector); 532 5084 johnlev 533 5084 johnlev ASSERT(intno < NR_IRQS); 534 5084 johnlev ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 535 5084 johnlev 536 10175 Stuart if (!ec_is_edge_pirq(intno)) 537 10175 Stuart ec_clear_irq(intno); 538 5084 johnlev 539 5084 johnlev newipl = autovect[intno].avh_hi_pri; 540 5084 johnlev if (newipl == 0) { 541 5084 johnlev /* 542 5084 johnlev * (newipl == 0) means we have no service routines for this 543 5084 johnlev * vector. We will treat this as a spurious interrupt. 544 5084 johnlev * We have cleared the pending bit already, clear the event 545 5084 johnlev * mask and return a spurious interrupt. This case can happen 546 5084 johnlev * when an interrupt delivery is racing with the removal of 547 5084 johnlev * of the service routine for that interrupt. 548 5084 johnlev */ 549 5084 johnlev ec_unmask_irq(intno); 550 5084 johnlev newipl = -1; /* flag spurious interrupt */ 551 5084 johnlev } else if (newipl <= cpu->cpu_pri) { 552 5084 johnlev /* 553 5084 johnlev * (newipl <= cpu->cpu_pri) means that we must be trying to 554 5084 johnlev * service a vector that was shared with a higher priority 555 5084 johnlev * isr. The higher priority handler has been removed and 556 5084 johnlev * we need to service this int. We can't return a lower 557 5084 johnlev * priority than current cpu priority. Just synthesize a 558 5084 johnlev * priority to return that should be acceptable. 559 10453 Stuart * It should never happen that we synthesize a priority that 560 10453 Stuart * moves us from low-priority to high-priority that would make 561 10453 Stuart * a us incorrectly run on the high priority stack. 562 5084 johnlev */ 563 5084 johnlev newipl = cpu->cpu_pri + 1; /* synthetic priority */ 564 10453 Stuart ASSERT(newipl != LOCK_LEVEL + 1); 565 5084 johnlev } 566 5084 johnlev return (newipl); 567 5084 johnlev } 568 5084 johnlev 569 5084 johnlev 570 5084 johnlev /* 571 5084 johnlev * xen_psm_intr_exit() restores the old interrupt 572 5084 johnlev * priority level after processing an interrupt. 573 5084 johnlev * It is called with interrupts disabled, and does not enable interrupts. 574 5084 johnlev */ 575 5084 johnlev /* ARGSUSED */ 576 5084 johnlev static void 577 5084 johnlev xen_psm_intr_exit(int ipl, int vector) 578 5084 johnlev { 579 5084 johnlev ec_try_unmask_irq(vector); 580 5084 johnlev xen_psm_setspl(ipl); 581 5084 johnlev } 582 5084 johnlev 583 5084 johnlev intr_exit_fn_t 584 5084 johnlev psm_intr_exit_fn(void) 585 5084 johnlev { 586 5084 johnlev return (xen_psm_intr_exit); 587 5084 johnlev } 588 5084 johnlev 589 5084 johnlev /* 590 5084 johnlev * Check if new ipl level allows delivery of previously unserviced events 591 5084 johnlev */ 592 5084 johnlev static void 593 5084 johnlev xen_psm_setspl(int ipl) 594 5084 johnlev { 595 5084 johnlev struct cpu *cpu = CPU; 596 5084 johnlev volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 597 5084 johnlev uint16_t pending; 598 5084 johnlev 599 5084 johnlev ASSERT(vci->evtchn_upcall_mask != 0); 600 5084 johnlev 601 5084 johnlev /* 602 5084 johnlev * If new ipl level will enable any pending interrupts, setup so the 603 5084 johnlev * upcoming sti will cause us to get an upcall. 604 5084 johnlev */ 605 5084 johnlev pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 606 5084 johnlev if (pending) { 607 5084 johnlev int i; 608 5084 johnlev ulong_t pending_sels = 0; 609 5084 johnlev volatile ulong_t *selp; 610 5084 johnlev struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 611 5084 johnlev 612 5084 johnlev for (i = bsrw_insn(pending); i > ipl; i--) 613 5084 johnlev pending_sels |= cpe->pending_sel[i]; 614 5084 johnlev ASSERT(pending_sels); 615 5084 johnlev selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 616 5084 johnlev atomic_or_ulong(selp, pending_sels); 617 5084 johnlev vci->evtchn_upcall_pending = 1; 618 5084 johnlev } 619 5084 johnlev } 620 5084 johnlev 621 5084 johnlev /* 622 5084 johnlev * This function provides external interface to the nexus for all 623 5084 johnlev * functionality related to the new DDI interrupt framework. 624 5084 johnlev * 625 5084 johnlev * Input: 626 5084 johnlev * dip - pointer to the dev_info structure of the requested device 627 5084 johnlev * hdlp - pointer to the internal interrupt handle structure for the 628 5084 johnlev * requested interrupt 629 5084 johnlev * intr_op - opcode for this call 630 5084 johnlev * result - pointer to the integer that will hold the result to be 631 5084 johnlev * passed back if return value is PSM_SUCCESS 632 5084 johnlev * 633 5084 johnlev * Output: 634 5084 johnlev * return value is either PSM_SUCCESS or PSM_FAILURE 635 5084 johnlev */ 636 5084 johnlev int 637 5084 johnlev xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 638 5084 johnlev psm_intr_op_t intr_op, int *result) 639 5084 johnlev { 640 5084 johnlev int cap; 641 5084 johnlev int err; 642 5084 johnlev int new_priority; 643 5084 johnlev apic_irq_t *irqp; 644 5084 johnlev struct intrspec *ispec; 645 5084 johnlev 646 5084 johnlev DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 647 5084 johnlev "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 648 5084 johnlev 649 5084 johnlev switch (intr_op) { 650 5084 johnlev case PSM_INTR_OP_CHECK_MSI: 651 10175 Stuart /* 652 10175 Stuart * Till PCI passthru is supported, only dom0 has MSI/MSIX 653 10175 Stuart */ 654 5084 johnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 655 5084 johnlev *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 656 5084 johnlev DDI_INTR_TYPE_MSIX); 657 5084 johnlev break; 658 5084 johnlev } 659 5084 johnlev /* 660 5084 johnlev * Check MSI/X is supported or not at APIC level and 661 5084 johnlev * masked off the MSI/X bits in hdlp->ih_type if not 662 5084 johnlev * supported before return. If MSI/X is supported, 663 5084 johnlev * leave the ih_type unchanged and return. 664 5084 johnlev * 665 5084 johnlev * hdlp->ih_type passed in from the nexus has all the 666 5084 johnlev * interrupt types supported by the device. 667 5084 johnlev */ 668 5084 johnlev if (xen_support_msi == 0) { 669 5084 johnlev /* 670 5084 johnlev * if xen_support_msi is not set, call 671 5084 johnlev * apic_check_msi_support() to check whether msi 672 5084 johnlev * is supported first 673 5084 johnlev */ 674 5084 johnlev if (apic_check_msi_support() == PSM_SUCCESS) 675 5084 johnlev xen_support_msi = 1; 676 5084 johnlev else 677 5084 johnlev xen_support_msi = -1; 678 5084 johnlev } 679 5084 johnlev if (xen_support_msi == 1) 680 5084 johnlev *result = hdlp->ih_type; 681 5084 johnlev else 682 5084 johnlev *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 683 5084 johnlev DDI_INTR_TYPE_MSIX); 684 5084 johnlev break; 685 5084 johnlev case PSM_INTR_OP_ALLOC_VECTORS: 686 10175 Stuart if (hdlp->ih_type == DDI_INTR_TYPE_MSI) 687 10175 Stuart *result = apic_alloc_msi_vectors(dip, hdlp->ih_inum, 688 10175 Stuart hdlp->ih_scratch1, hdlp->ih_pri, 689 10175 Stuart (int)(uintptr_t)hdlp->ih_scratch2); 690 10175 Stuart else 691 10175 Stuart *result = apic_alloc_msix_vectors(dip, hdlp->ih_inum, 692 10175 Stuart hdlp->ih_scratch1, hdlp->ih_pri, 693 10175 Stuart (int)(uintptr_t)hdlp->ih_scratch2); 694 5084 johnlev break; 695 5084 johnlev case PSM_INTR_OP_FREE_VECTORS: 696 5084 johnlev apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 697 5084 johnlev hdlp->ih_pri, hdlp->ih_type); 698 5084 johnlev break; 699 5084 johnlev case PSM_INTR_OP_NAVAIL_VECTORS: 700 5084 johnlev /* 701 5084 johnlev * XXPV - maybe we should make this be: 702 5084 johnlev * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 703 5084 johnlev */ 704 5084 johnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) 705 5084 johnlev *result = APIC_VECTOR_PER_IPL; 706 5084 johnlev else 707 5084 johnlev *result = 1; 708 5084 johnlev break; 709 5084 johnlev case PSM_INTR_OP_XLATE_VECTOR: 710 5084 johnlev ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 711 5084 johnlev if (ispec->intrspec_vec >= PIRQ_BASE && 712 5084 johnlev ispec->intrspec_vec < NR_PIRQS && 713 5084 johnlev DOMAIN_IS_INITDOMAIN(xen_info)) { 714 5084 johnlev *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 715 5084 johnlev } else { 716 5084 johnlev *result = ispec->intrspec_vec; 717 5084 johnlev } 718 5084 johnlev break; 719 5084 johnlev case PSM_INTR_OP_GET_PENDING: 720 5084 johnlev /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 721 5084 johnlev *result = ec_pending_irq(hdlp->ih_vector); 722 5084 johnlev break; 723 5084 johnlev case PSM_INTR_OP_CLEAR_MASK: 724 5084 johnlev /* XXPV - is this enough for dom0 or do we need to set ioapic */ 725 5084 johnlev if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 726 5084 johnlev return (PSM_FAILURE); 727 5084 johnlev ec_enable_irq(hdlp->ih_vector); 728 5084 johnlev break; 729 5084 johnlev case PSM_INTR_OP_SET_MASK: 730 5084 johnlev /* XXPV - is this enough for dom0 or do we need to set ioapic */ 731 5084 johnlev if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 732 5084 johnlev return (PSM_FAILURE); 733 5084 johnlev ec_disable_irq(hdlp->ih_vector); 734 5084 johnlev break; 735 5084 johnlev case PSM_INTR_OP_GET_CAP: 736 5084 johnlev cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 737 5084 johnlev if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 738 5084 johnlev cap |= DDI_INTR_FLAG_MASKABLE; 739 5084 johnlev *result = cap; 740 5084 johnlev break; 741 5084 johnlev case PSM_INTR_OP_GET_SHARED: 742 5084 johnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) { 743 5084 johnlev if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 744 5084 johnlev return (PSM_FAILURE); 745 10190 Sophia ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 746 5084 johnlev if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 747 5084 johnlev == NULL) 748 5084 johnlev return (PSM_FAILURE); 749 10190 Sophia *result = (irqp->airq_share > 1) ? 1: 0; 750 5084 johnlev } else { 751 5084 johnlev return (PSM_FAILURE); 752 5084 johnlev } 753 5084 johnlev break; 754 5084 johnlev case PSM_INTR_OP_SET_PRI: 755 5084 johnlev new_priority = *(int *)result; 756 5084 johnlev err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 757 5084 johnlev if (err != 0) 758 5084 johnlev return (PSM_FAILURE); 759 5084 johnlev break; 760 5084 johnlev case PSM_INTR_OP_GET_INTR: 761 5084 johnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) 762 5084 johnlev return (PSM_FAILURE); 763 5084 johnlev /* 764 5084 johnlev * The interrupt handle given here has been allocated 765 5084 johnlev * specifically for this command, and ih_private carries 766 5084 johnlev * a pointer to a apic_get_intr_t. 767 5084 johnlev */ 768 5084 johnlev if (apic_get_vector_intr_info( 769 5084 johnlev hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 770 5084 johnlev return (PSM_FAILURE); 771 5084 johnlev break; 772 5084 johnlev case PSM_INTR_OP_SET_CAP: 773 5084 johnlev /* FALLTHRU */ 774 5084 johnlev default: 775 5084 johnlev return (PSM_FAILURE); 776 5084 johnlev } 777 5084 johnlev return (PSM_SUCCESS); 778 5084 johnlev } 779 5084 johnlev 780 5084 johnlev static void 781 5084 johnlev xen_psm_rebind_irq(int irq) 782 5084 johnlev { 783 5084 johnlev cpuset_t ncpu; 784 5084 johnlev processorid_t newcpu; 785 5529 smaybe apic_irq_t *irqptr; 786 5084 johnlev 787 5084 johnlev newcpu = xen_psm_bind_intr(irq); 788 5084 johnlev if (newcpu == IRQ_UNBOUND) { 789 5084 johnlev CPUSET_ZERO(ncpu); 790 5084 johnlev CPUSET_OR(ncpu, xen_psm_cpus_online); 791 5084 johnlev } else { 792 5084 johnlev CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 793 5084 johnlev } 794 5084 johnlev ec_set_irq_affinity(irq, ncpu); 795 5573 smaybe if (irq <= APIC_MAX_VECTOR) { 796 5573 smaybe irqptr = apic_irq_table[irq]; 797 5573 smaybe ASSERT(irqptr != NULL); 798 5573 smaybe irqptr->airq_temp_cpu = (uchar_t)newcpu; 799 5573 smaybe } 800 5084 johnlev } 801 5084 johnlev 802 5084 johnlev /* 803 5084 johnlev * Disable all device interrupts for the given cpu. 804 5084 johnlev * High priority interrupts are not disabled and will still be serviced. 805 5084 johnlev */ 806 5084 johnlev static int 807 5084 johnlev xen_psm_disable_intr(processorid_t cpun) 808 5084 johnlev { 809 5084 johnlev int irq; 810 5084 johnlev 811 5084 johnlev /* 812 5084 johnlev * Can't offline VCPU 0 on this hypervisor. There's no reason 813 5084 johnlev * anyone would want to given that the CPUs are virtual. Also note 814 5084 johnlev * that the hypervisor requires suspend/resume to be on VCPU 0. 815 5084 johnlev */ 816 5084 johnlev if (cpun == 0) 817 5084 johnlev return (PSM_FAILURE); 818 5084 johnlev 819 5529 smaybe CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 820 5084 johnlev for (irq = 0; irq < NR_IRQS; irq++) { 821 5084 johnlev if (!ec_irq_needs_rebind(irq, cpun)) 822 5084 johnlev continue; 823 5084 johnlev xen_psm_rebind_irq(irq); 824 5084 johnlev } 825 5084 johnlev return (PSM_SUCCESS); 826 5084 johnlev } 827 5084 johnlev 828 5084 johnlev static void 829 5084 johnlev xen_psm_enable_intr(processorid_t cpun) 830 5084 johnlev { 831 5084 johnlev int irq; 832 5084 johnlev 833 5084 johnlev if (cpun == 0) 834 5084 johnlev return; 835 5084 johnlev 836 5529 smaybe CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 837 5084 johnlev 838 5084 johnlev /* 839 5084 johnlev * Rebalance device interrupts among online processors 840 5084 johnlev */ 841 5084 johnlev for (irq = 0; irq < NR_IRQS; irq++) { 842 5084 johnlev if (!ec_irq_rebindable(irq)) 843 5084 johnlev continue; 844 5084 johnlev xen_psm_rebind_irq(irq); 845 5084 johnlev } 846 6749 sherrym 847 6749 sherrym if (DOMAIN_IS_INITDOMAIN(xen_info)) { 848 6749 sherrym apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 849 6749 sherrym } 850 5529 smaybe } 851 5529 smaybe 852 5529 smaybe static int 853 5529 smaybe xen_psm_post_cpu_start() 854 5529 smaybe { 855 5529 smaybe processorid_t cpun; 856 5529 smaybe 857 6467 smaybe cpun = psm_get_cpu_id(); 858 5529 smaybe if (DOMAIN_IS_INITDOMAIN(xen_info)) { 859 7113 bholler /* 860 7113 bholler * Non-virtualized environments can call psm_post_cpu_start 861 7113 bholler * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. 862 7113 bholler * xen_psm_post_cpu_start() is only called from boot. 863 7113 bholler */ 864 7113 bholler apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; 865 5529 smaybe } 866 5529 smaybe return (PSM_SUCCESS); 867 5084 johnlev } 868 5084 johnlev 869 5084 johnlev /* 870 5084 johnlev * This function will reprogram the timer. 871 5084 johnlev * 872 5084 johnlev * When in oneshot mode the argument is the absolute time in future at which to 873 5084 johnlev * generate the interrupt. 874 5084 johnlev * 875 5084 johnlev * When in periodic mode, the argument is the interval at which the 876 5084 johnlev * interrupts should be generated. There is no need to support the periodic 877 5084 johnlev * mode timer change at this time. 878 5084 johnlev * 879 5084 johnlev * Note that we must be careful to convert from hrtime to Xen system time (see 880 5084 johnlev * xpv_timestamp.c). 881 5084 johnlev */ 882 5084 johnlev static void 883 5084 johnlev xen_psm_timer_reprogram(hrtime_t timer_req) 884 5084 johnlev { 885 5084 johnlev hrtime_t now, timer_new, time_delta, xen_time; 886 5084 johnlev ulong_t flags; 887 5084 johnlev 888 5084 johnlev flags = intr_clear(); 889 5084 johnlev /* 890 5084 johnlev * We should be called from high PIL context (CBE_HIGH_PIL), 891 5084 johnlev * so kpreempt is disabled. 892 5084 johnlev */ 893 5084 johnlev 894 5084 johnlev now = xpv_gethrtime(); 895 5084 johnlev xen_time = xpv_getsystime(); 896 5084 johnlev if (timer_req <= now) { 897 5084 johnlev /* 898 5084 johnlev * requested to generate an interrupt in the past 899 5084 johnlev * generate an interrupt as soon as possible 900 5084 johnlev */ 901 5084 johnlev time_delta = XEN_NSEC_PER_TICK; 902 5084 johnlev } else 903 5084 johnlev time_delta = timer_req - now; 904 5084 johnlev 905 5084 johnlev timer_new = xen_time + time_delta; 906 5084 johnlev if (HYPERVISOR_set_timer_op(timer_new) != 0) 907 5084 johnlev panic("can't set hypervisor timer?"); 908 5084 johnlev intr_restore(flags); 909 5084 johnlev } 910 5084 johnlev 911 5084 johnlev /* 912 5084 johnlev * This function will enable timer interrupts. 913 5084 johnlev */ 914 5084 johnlev static void 915 5084 johnlev xen_psm_timer_enable(void) 916 5084 johnlev { 917 5084 johnlev ec_unmask_irq(xen_clock_irq); 918 5084 johnlev } 919 5084 johnlev 920 5084 johnlev /* 921 5084 johnlev * This function will disable timer interrupts on the current cpu. 922 5084 johnlev */ 923 5084 johnlev static void 924 5084 johnlev xen_psm_timer_disable(void) 925 5084 johnlev { 926 5084 johnlev (void) ec_block_irq(xen_clock_irq); 927 5084 johnlev /* 928 5084 johnlev * If the clock irq is pending on this cpu then we need to 929 5084 johnlev * clear the pending interrupt. 930 5084 johnlev */ 931 5084 johnlev ec_unpend_irq(xen_clock_irq); 932 5084 johnlev } 933 5084 johnlev 934 5084 johnlev /* 935 5084 johnlev * 936 5084 johnlev * The following functions are in the platform specific file so that they 937 5084 johnlev * can be different functions depending on whether we are running on 938 5084 johnlev * bare metal or a hypervisor. 939 5084 johnlev */ 940 5084 johnlev 941 5084 johnlev /* 942 5084 johnlev * Allocate a free vector for irq at ipl. 943 5084 johnlev */ 944 5084 johnlev /* ARGSUSED */ 945 5084 johnlev uchar_t 946 5084 johnlev apic_allocate_vector(int ipl, int irq, int pri) 947 5084 johnlev { 948 5084 johnlev physdev_irq_t irq_op; 949 5084 johnlev uchar_t vector; 950 10175 Stuart int rc; 951 5084 johnlev 952 5084 johnlev irq_op.irq = irq; 953 5084 johnlev 954 10175 Stuart if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 955 10175 Stuart != 0) 956 10175 Stuart panic("Hypervisor alloc vector failed err: %d", -rc); 957 5084 johnlev vector = irq_op.vector; 958 5084 johnlev /* 959 5084 johnlev * No need to worry about vector colliding with our reserved vectors 960 5084 johnlev * e.g. T_FASTTRAP, xen can differentiate between hardware and software 961 5084 johnlev * generated traps and handle them properly. 962 5084 johnlev */ 963 5084 johnlev apic_vector_to_irq[vector] = (uchar_t)irq; 964 5084 johnlev return (vector); 965 5084 johnlev } 966 5084 johnlev 967 5084 johnlev /* Mark vector as not being used by any irq */ 968 5084 johnlev void 969 5084 johnlev apic_free_vector(uchar_t vector) 970 5084 johnlev { 971 5084 johnlev apic_vector_to_irq[vector] = APIC_RESV_IRQ; 972 5084 johnlev } 973 5084 johnlev 974 5084 johnlev /* 975 10175 Stuart * This function returns the no. of vectors available for the pri. 976 10175 Stuart * dip is not used at this moment. If we really don't need that, 977 10175 Stuart * it will be removed. Since priority is not limited by hardware 978 10175 Stuart * when running on the hypervisor we simply return the maximum no. 979 10175 Stuart * of available contiguous vectors. 980 10175 Stuart */ 981 10175 Stuart /*ARGSUSED*/ 982 10175 Stuart int 983 10175 Stuart apic_navail_vector(dev_info_t *dip, int pri) 984 10175 Stuart { 985 10175 Stuart int lowest, highest, i, navail, count; 986 10175 Stuart 987 10175 Stuart DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n", 988 10175 Stuart (void *)dip, pri)); 989 10175 Stuart 990 10175 Stuart highest = APIC_MAX_VECTOR; 991 10175 Stuart lowest = APIC_BASE_VECT; 992 10175 Stuart navail = count = 0; 993 10175 Stuart 994 10175 Stuart /* It has to be contiguous */ 995 10175 Stuart for (i = lowest; i < highest; i++) { 996 10175 Stuart count = 0; 997 10175 Stuart while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) && 998 10175 Stuart (i < highest)) { 999 10175 Stuart count++; 1000 10175 Stuart i++; 1001 10175 Stuart } 1002 10175 Stuart if (count > navail) 1003 10175 Stuart navail = count; 1004 10175 Stuart } 1005 10175 Stuart return (navail); 1006 10175 Stuart } 1007 10175 Stuart 1008 10175 Stuart static physdev_manage_pci_t *managed_devlist; 1009 10175 Stuart static int mdev_cnt; 1010 10175 Stuart static int mdev_size = 128; 1011 10175 Stuart static uchar_t msi_vector_to_pirq[APIC_MAX_VECTOR+1]; 1012 10175 Stuart 1013 10175 Stuart /* 1014 10175 Stuart * Add devfn on given bus to devices managed by hypervisor 1015 5084 johnlev */ 1016 5084 johnlev static int 1017 10175 Stuart xen_manage_device(uint8_t bus, uint8_t devfn) 1018 10175 Stuart { 1019 10175 Stuart physdev_manage_pci_t manage_pci, *newlist; 1020 10175 Stuart int rc, i, oldsize; 1021 10175 Stuart 1022 10175 Stuart /* 1023 10175 Stuart * Check if bus/devfn already managed. If so just return success. 1024 10175 Stuart */ 1025 10175 Stuart if (managed_devlist == NULL) { 1026 10175 Stuart managed_devlist = kmem_alloc(sizeof (physdev_manage_pci_t) * 1027 10175 Stuart mdev_size, KM_NOSLEEP); 1028 10175 Stuart if (managed_devlist == NULL) { 1029 10175 Stuart cmn_err(CE_WARN, 1030 10175 Stuart "Can't alloc space for managed device list"); 1031 10175 Stuart return (0); 1032 10175 Stuart } 1033 10175 Stuart }; 1034 10175 Stuart for (i = 0; i < mdev_cnt; i++) { 1035 10175 Stuart if (managed_devlist[i].bus == bus && 1036 10175 Stuart managed_devlist[i].devfn == devfn) 1037 10175 Stuart return (1); /* device already managed */ 1038 10175 Stuart } 1039 10175 Stuart manage_pci.bus = bus; 1040 10175 Stuart manage_pci.devfn = devfn; 1041 10175 Stuart rc = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); 1042 10175 Stuart if (rc < 0) { 1043 10175 Stuart cmn_err(CE_WARN, 1044 10175 Stuart "hypervisor add pci device call failed bus:0x%x" 1045 10175 Stuart " devfn:0x%x", bus, devfn); 1046 10175 Stuart return (0); 1047 10175 Stuart } 1048 10175 Stuart /* 1049 10175 Stuart * Add device to the managed device list 1050 10175 Stuart */ 1051 10175 Stuart if (i == mdev_size) { 1052 10175 Stuart /* 1053 10175 Stuart * grow the managed device list 1054 10175 Stuart */ 1055 10175 Stuart oldsize = mdev_size * sizeof (physdev_manage_pci_t); 1056 10175 Stuart mdev_size *= 2; 1057 10175 Stuart newlist = kmem_alloc(sizeof (physdev_manage_pci_t) * mdev_size, 1058 10175 Stuart KM_NOSLEEP); 1059 10175 Stuart if (newlist == NULL) { 1060 10175 Stuart cmn_err(CE_WARN, "Can't grow managed device list"); 1061 10175 Stuart return (0); 1062 10175 Stuart } 1063 10175 Stuart bcopy(managed_devlist, newlist, oldsize); 1064 10175 Stuart kmem_free(managed_devlist, oldsize); 1065 10175 Stuart managed_devlist = newlist; 1066 10175 Stuart } 1067 10175 Stuart managed_devlist[i].bus = bus; 1068 10175 Stuart managed_devlist[i].devfn = devfn; 1069 10175 Stuart mdev_cnt++; 1070 10175 Stuart return (1); 1071 10175 Stuart } 1072 10175 Stuart 1073 10175 Stuart /* 1074 10175 Stuart * allocate an apic irq struct for an MSI interrupt 1075 10175 Stuart */ 1076 10175 Stuart static int 1077 10175 Stuart msi_allocate_irq(int irq) 1078 10175 Stuart { 1079 10175 Stuart apic_irq_t *irqptr = apic_irq_table[irq]; 1080 10175 Stuart 1081 10175 Stuart if (irqptr == NULL) { 1082 10175 Stuart irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1083 10175 Stuart if (irqptr == NULL) { 1084 10175 Stuart cmn_err(CE_WARN, "xpv_psm: NO memory to allocate IRQ"); 1085 10175 Stuart return (-1); 1086 10175 Stuart } 1087 10175 Stuart apic_irq_table[irq] = irqptr; 1088 10175 Stuart } else { 1089 10175 Stuart if (irq == APIC_RESV_IRQ && irqptr->airq_mps_intr_index == 0) 1090 10175 Stuart irqptr->airq_mps_intr_index = FREE_INDEX; 1091 10175 Stuart if (irqptr->airq_mps_intr_index != FREE_INDEX) { 1092 10175 Stuart cmn_err(CE_WARN, "xpv_psm: MSI IRQ already in use"); 1093 10175 Stuart return (-1); 1094 10175 Stuart } 1095 10175 Stuart } 1096 10175 Stuart irqptr->airq_mps_intr_index = FREE_INDEX; 1097 10175 Stuart return (irq); 1098 10175 Stuart } 1099 10175 Stuart 1100 10175 Stuart /* 1101 10175 Stuart * read MSI/MSIX vector out of config space 1102 10175 Stuart */ 1103 10175 Stuart static uchar_t 1104 10175 Stuart xpv_psm_get_msi_vector(dev_info_t *dip, int type, int entry) 1105 10175 Stuart { 1106 10175 Stuart uint64_t msi_data = 0; 1107 10175 Stuart int cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip); 1108 10175 Stuart ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(dip); 1109 10175 Stuart ushort_t msi_ctrl; 1110 10175 Stuart uchar_t vector; 1111 10175 Stuart 1112 10175 Stuart ASSERT((handle != NULL) && (cap_ptr != 0)); 1113 10175 Stuart if (type == DDI_INTR_TYPE_MSI) { 1114 10175 Stuart msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1115 10175 Stuart /* 1116 10175 Stuart * Get vector 1117 10175 Stuart */ 1118 10175 Stuart if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1119 10175 Stuart msi_data = pci_config_get16(handle, 1120 10175 Stuart cap_ptr + PCI_MSI_64BIT_DATA); 1121 10175 Stuart } else { 1122 10175 Stuart msi_data = pci_config_get16(handle, 1123 10175 Stuart cap_ptr + PCI_MSI_32BIT_DATA); 1124 10175 Stuart } 1125 11188 Frank vector = (msi_data & 0xff) + entry; 1126 10175 Stuart } else if (type == DDI_INTR_TYPE_MSIX) { 1127 10175 Stuart uintptr_t off; 1128 10175 Stuart ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1129 10175 Stuart 1130 10175 Stuart /* Offset into the given entry in the MSI-X table */ 1131 10175 Stuart off = (uintptr_t)msix_p->msix_tbl_addr + 1132 10175 Stuart (entry * PCI_MSIX_VECTOR_SIZE); 1133 10175 Stuart 1134 10175 Stuart msi_data = ddi_get32(msix_p->msix_tbl_hdl, 1135 10175 Stuart (uint32_t *)(off + PCI_MSIX_DATA_OFFSET)); 1136 11188 Frank vector = msi_data & 0xff; 1137 10175 Stuart } 1138 10175 Stuart return (vector); 1139 10175 Stuart } 1140 10175 Stuart 1141 10175 Stuart 1142 10175 Stuart static void 1143 10175 Stuart get_busdevfn(dev_info_t *dip, int *busp, int *devfnp) 1144 10175 Stuart { 1145 10175 Stuart pci_regspec_t *regspec; 1146 10175 Stuart int reglen; 1147 10175 Stuart 1148 10175 Stuart /* 1149 10175 Stuart * Get device reg spec, first word has PCI bus and 1150 10175 Stuart * device/function info we need. 1151 10175 Stuart */ 1152 10175 Stuart if (ddi_getlongprop(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS, "reg", 1153 10175 Stuart (caddr_t)®spec, ®len) != DDI_SUCCESS) { 1154 10175 Stuart cmn_err(CE_WARN, 1155 10175 Stuart "get_busdevfn() failed to get regspec."); 1156 10175 Stuart return; 1157 10175 Stuart } 1158 10175 Stuart /* 1159 10175 Stuart * get PCI bus # from reg spec for device 1160 10175 Stuart */ 1161 10175 Stuart *busp = PCI_REG_BUS_G(regspec[0].pci_phys_hi); 1162 10175 Stuart /* 1163 10175 Stuart * get combined device/function from reg spec for device. 1164 10175 Stuart */ 1165 10175 Stuart *devfnp = (regspec[0].pci_phys_hi & (PCI_REG_FUNC_M | PCI_REG_DEV_M)) >> 1166 10175 Stuart PCI_REG_FUNC_SHIFT; 1167 10175 Stuart 1168 10175 Stuart kmem_free(regspec, reglen); 1169 10175 Stuart } 1170 10175 Stuart 1171 10175 Stuart /* 1172 10175 Stuart * This function allocates "count" MSI vector(s) for the given "dip/pri/type" 1173 10175 Stuart */ 1174 10175 Stuart int 1175 10175 Stuart apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 1176 5084 johnlev int behavior) 1177 5084 johnlev { 1178 10175 Stuart int rcount, i, rc, irqno; 1179 5084 johnlev uchar_t vector, cpu; 1180 5084 johnlev major_t major; 1181 5084 johnlev apic_irq_t *irqptr; 1182 10175 Stuart physdev_map_pirq_t map_irq; 1183 10175 Stuart int busnum, devfn; 1184 5084 johnlev 1185 10175 Stuart DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p " 1186 5084 johnlev "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 1187 10175 Stuart (void *)dip, inum, pri, count, behavior)); 1188 5084 johnlev 1189 5084 johnlev if (count > 1) { 1190 5084 johnlev if (behavior == DDI_INTR_ALLOC_STRICT && 1191 8925 Evan apic_multi_msi_enable == 0) 1192 5084 johnlev return (0); 1193 5084 johnlev if (apic_multi_msi_enable == 0) 1194 5084 johnlev count = 1; 1195 5084 johnlev } 1196 5084 johnlev 1197 10175 Stuart if ((rcount = apic_navail_vector(dip, pri)) > count) 1198 10175 Stuart rcount = count; 1199 10175 Stuart else if (rcount == 0 || (rcount < count && 1200 10175 Stuart behavior == DDI_INTR_ALLOC_STRICT)) 1201 10175 Stuart return (0); 1202 10175 Stuart 1203 10175 Stuart /* if not ISP2, then round it down */ 1204 10175 Stuart if (!ISP2(rcount)) 1205 10175 Stuart rcount = 1 << (highbit(rcount) - 1); 1206 10175 Stuart 1207 5084 johnlev /* 1208 10175 Stuart * get PCI bus # and devfn from reg spec for device 1209 5084 johnlev */ 1210 10175 Stuart get_busdevfn(dip, &busnum, &devfn); 1211 10175 Stuart 1212 10175 Stuart /* 1213 10175 Stuart * Tell xen about this pci device 1214 10175 Stuart */ 1215 10175 Stuart if (!xen_manage_device(busnum, devfn)) 1216 10175 Stuart return (0); 1217 5084 johnlev 1218 5084 johnlev mutex_enter(&airq_mutex); 1219 5084 johnlev 1220 10175 Stuart major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1221 5084 johnlev for (i = 0; i < rcount; i++) { 1222 10175 Stuart /* 1223 10175 Stuart * use PHYSDEVOP_map_pirq to have xen map MSI to a pirq 1224 10175 Stuart */ 1225 10175 Stuart map_irq.domid = DOMID_SELF; 1226 10175 Stuart map_irq.type = MAP_PIRQ_TYPE_MSI; 1227 11188 Frank map_irq.index = -rcount; /* hypervisor auto allocates vectors */ 1228 10175 Stuart map_irq.pirq = -1; 1229 10175 Stuart map_irq.bus = busnum; 1230 10175 Stuart map_irq.devfn = devfn; 1231 11188 Frank map_irq.entry_nr = i; 1232 10175 Stuart map_irq.table_base = 0; 1233 10175 Stuart rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1234 10175 Stuart irqno = map_irq.pirq; 1235 10175 Stuart if (rc < 0) { 1236 5084 johnlev mutex_exit(&airq_mutex); 1237 10175 Stuart cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1238 11188 Frank return (i); 1239 5084 johnlev } 1240 10175 Stuart if (irqno < 0) { 1241 10175 Stuart mutex_exit(&airq_mutex); 1242 10175 Stuart cmn_err(CE_NOTE, 1243 10175 Stuart "!hypervisor not configured for MSI support"); 1244 10175 Stuart xen_support_msi = -1; 1245 10175 Stuart return (0); 1246 10175 Stuart } 1247 11188 Frank 1248 10175 Stuart /* 1249 10175 Stuart * Find out what vector the hypervisor assigned 1250 10175 Stuart */ 1251 11188 Frank vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSI, i); 1252 11188 Frank 1253 11188 Frank if (msi_allocate_irq(irqno) < 0) { 1254 11188 Frank mutex_exit(&airq_mutex); 1255 11188 Frank return (i); 1256 11188 Frank } 1257 5084 johnlev apic_max_device_irq = max(irqno, apic_max_device_irq); 1258 5084 johnlev apic_min_device_irq = min(irqno, apic_min_device_irq); 1259 5084 johnlev irqptr = apic_irq_table[irqno]; 1260 10175 Stuart ASSERT(irqptr != NULL); 1261 5084 johnlev #ifdef DEBUG 1262 5084 johnlev if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1263 10175 Stuart DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: " 1264 5084 johnlev "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1265 5084 johnlev #endif 1266 10175 Stuart apic_vector_to_irq[vector] = (uchar_t)irqno; 1267 10175 Stuart msi_vector_to_pirq[vector] = (uchar_t)irqno; 1268 5084 johnlev 1269 5084 johnlev irqptr->airq_vector = vector; 1270 5084 johnlev irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1271 5084 johnlev irqptr->airq_intin_no = (uchar_t)rcount; 1272 5084 johnlev irqptr->airq_ipl = pri; 1273 5084 johnlev irqptr->airq_origirq = (uchar_t)(inum + i); 1274 5084 johnlev irqptr->airq_share_id = 0; 1275 5084 johnlev irqptr->airq_mps_intr_index = MSI_INDEX; 1276 5084 johnlev irqptr->airq_dip = dip; 1277 5084 johnlev irqptr->airq_major = major; 1278 10175 Stuart if (i == 0) /* they all bind to the same cpu */ 1279 10175 Stuart cpu = irqptr->airq_cpu = xen_psm_bind_intr(irqno); 1280 5084 johnlev else 1281 5084 johnlev irqptr->airq_cpu = cpu; 1282 10175 Stuart DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x " 1283 5084 johnlev "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1284 5084 johnlev (void *)irqptr->airq_dip, irqptr->airq_vector, 1285 5084 johnlev irqptr->airq_origirq, pri)); 1286 5084 johnlev } 1287 5084 johnlev mutex_exit(&airq_mutex); 1288 5084 johnlev return (rcount); 1289 10175 Stuart } 1290 10175 Stuart 1291 10175 Stuart /* 1292 10175 Stuart * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type" 1293 10175 Stuart */ 1294 10175 Stuart int 1295 10175 Stuart apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 1296 10175 Stuart int behavior) 1297 10175 Stuart { 1298 10175 Stuart int rcount, i, rc; 1299 10175 Stuart major_t major; 1300 10175 Stuart physdev_map_pirq_t map_irq; 1301 10175 Stuart int busnum, devfn; 1302 10175 Stuart ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1303 10175 Stuart uint64_t table_base; 1304 10175 Stuart pfn_t pfnum; 1305 10175 Stuart 1306 10175 Stuart if (msix_p == NULL) { 1307 10175 Stuart msix_p = pci_msix_init(dip); 1308 10175 Stuart if (msix_p != NULL) { 1309 10175 Stuart i_ddi_set_msix(dip, msix_p); 1310 10175 Stuart } else { 1311 10175 Stuart cmn_err(CE_WARN, "apic_alloc_msix_vectors()" 1312 10175 Stuart " msix_init failed"); 1313 10175 Stuart return (0); 1314 10175 Stuart } 1315 10175 Stuart } 1316 10175 Stuart /* 1317 10323 Stuart * Hypervisor wants PCI config space address of msix table base 1318 10175 Stuart */ 1319 10175 Stuart pfnum = hat_getpfnum(kas.a_hat, (caddr_t)msix_p->msix_tbl_addr) & 1320 10175 Stuart ~PFN_IS_FOREIGN_MFN; 1321 10323 Stuart table_base = (uint64_t)((pfnum << PAGESHIFT) - msix_p->msix_tbl_offset | 1322 10175 Stuart ((uintptr_t)msix_p->msix_tbl_addr & PAGEOFFSET)); 1323 10175 Stuart /* 1324 10175 Stuart * get PCI bus # and devfn from reg spec for device 1325 10175 Stuart */ 1326 10175 Stuart get_busdevfn(dip, &busnum, &devfn); 1327 10175 Stuart 1328 10175 Stuart /* 1329 10175 Stuart * Tell xen about this pci device 1330 10175 Stuart */ 1331 10175 Stuart if (!xen_manage_device(busnum, devfn)) 1332 10175 Stuart return (0); 1333 10175 Stuart mutex_enter(&airq_mutex); 1334 10175 Stuart 1335 10175 Stuart if ((rcount = apic_navail_vector(dip, pri)) > count) 1336 10175 Stuart rcount = count; 1337 10175 Stuart else if (rcount == 0 || (rcount < count && 1338 10175 Stuart behavior == DDI_INTR_ALLOC_STRICT)) { 1339 10175 Stuart rcount = 0; 1340 10175 Stuart goto out; 1341 10175 Stuart } 1342 10175 Stuart 1343 10175 Stuart major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1344 10175 Stuart for (i = 0; i < rcount; i++) { 1345 10175 Stuart int irqno; 1346 10175 Stuart uchar_t vector; 1347 10175 Stuart apic_irq_t *irqptr; 1348 10175 Stuart 1349 10175 Stuart /* 1350 10175 Stuart * use PHYSDEVOP_map_pirq to have xen map MSI-X to a pirq 1351 10175 Stuart */ 1352 10175 Stuart map_irq.domid = DOMID_SELF; 1353 10175 Stuart map_irq.type = MAP_PIRQ_TYPE_MSI; 1354 10175 Stuart map_irq.index = -1; /* hypervisor auto allocates vector */ 1355 10175 Stuart map_irq.pirq = -1; 1356 10175 Stuart map_irq.bus = busnum; 1357 10175 Stuart map_irq.devfn = devfn; 1358 10175 Stuart map_irq.entry_nr = i; 1359 10175 Stuart map_irq.table_base = table_base; 1360 10175 Stuart rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1361 10175 Stuart irqno = map_irq.pirq; 1362 10175 Stuart if (rc < 0) { 1363 10175 Stuart mutex_exit(&airq_mutex); 1364 10175 Stuart cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1365 11188 Frank return (i); 1366 10175 Stuart } 1367 10175 Stuart if (irqno < 0) { 1368 10175 Stuart mutex_exit(&airq_mutex); 1369 10175 Stuart cmn_err(CE_NOTE, 1370 10175 Stuart "!hypervisor not configured for MSI support"); 1371 10175 Stuart xen_support_msi = -1; 1372 10175 Stuart return (0); 1373 10175 Stuart } 1374 10175 Stuart /* 1375 10175 Stuart * Find out what vector the hypervisor assigned 1376 10175 Stuart */ 1377 10175 Stuart vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSIX, i); 1378 11188 Frank 1379 10175 Stuart if (msi_allocate_irq(irqno) < 0) { 1380 10175 Stuart mutex_exit(&airq_mutex); 1381 11188 Frank return (i); 1382 10175 Stuart } 1383 10175 Stuart apic_vector_to_irq[vector] = (uchar_t)irqno; 1384 10175 Stuart msi_vector_to_pirq[vector] = (uchar_t)irqno; 1385 10175 Stuart apic_max_device_irq = max(irqno, apic_max_device_irq); 1386 10175 Stuart apic_min_device_irq = min(irqno, apic_min_device_irq); 1387 10175 Stuart irqptr = apic_irq_table[irqno]; 1388 10175 Stuart ASSERT(irqptr != NULL); 1389 10175 Stuart irqptr->airq_vector = (uchar_t)vector; 1390 10175 Stuart irqptr->airq_ipl = pri; 1391 10175 Stuart irqptr->airq_origirq = (uchar_t)(inum + i); 1392 10175 Stuart irqptr->airq_share_id = 0; 1393 10175 Stuart irqptr->airq_mps_intr_index = MSIX_INDEX; 1394 10175 Stuart irqptr->airq_dip = dip; 1395 10175 Stuart irqptr->airq_major = major; 1396 10175 Stuart irqptr->airq_cpu = IRQ_UNBOUND; /* will be bound when addspl */ 1397 10175 Stuart } 1398 10175 Stuart out: 1399 10175 Stuart mutex_exit(&airq_mutex); 1400 10175 Stuart return (rcount); 1401 10175 Stuart } 1402 10175 Stuart 1403 10175 Stuart 1404 10175 Stuart /* 1405 10175 Stuart * This finds the apic_irq_t associated with the dip, ispec and type. 1406 10175 Stuart * The entry should have already been freed, but it can not have been 1407 10175 Stuart * reused yet since the hypervisor can not have reassigned the pirq since 1408 10175 Stuart * we have not freed that yet. 1409 10175 Stuart */ 1410 10175 Stuart static apic_irq_t * 1411 10175 Stuart msi_find_irq(dev_info_t *dip, struct intrspec *ispec) 1412 10175 Stuart { 1413 10175 Stuart apic_irq_t *irqp; 1414 10175 Stuart int i; 1415 10175 Stuart 1416 10175 Stuart for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 1417 10175 Stuart if ((irqp = apic_irq_table[i]) == NULL) 1418 10175 Stuart continue; 1419 10175 Stuart if ((irqp->airq_dip == dip) && 1420 10175 Stuart (irqp->airq_origirq == ispec->intrspec_vec) && 1421 10175 Stuart (irqp->airq_ipl == ispec->intrspec_pri)) { 1422 10175 Stuart return (irqp); 1423 10175 Stuart } 1424 10175 Stuart } 1425 10175 Stuart return (NULL); 1426 10175 Stuart } 1427 10175 Stuart 1428 10175 Stuart void 1429 10175 Stuart apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type) 1430 10175 Stuart { 1431 10175 Stuart int i, rc; 1432 10175 Stuart physdev_unmap_pirq_t unmap_pirq; 1433 10175 Stuart apic_irq_t *irqptr; 1434 10175 Stuart struct intrspec ispec; 1435 10175 Stuart 1436 10175 Stuart DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x " 1437 10175 Stuart "count: %x pri: %x type: %x\n", 1438 10175 Stuart (void *)dip, inum, count, pri, type)); 1439 10175 Stuart 1440 10175 Stuart /* for MSI/X only */ 1441 10175 Stuart if (!DDI_INTR_IS_MSI_OR_MSIX(type)) 1442 10175 Stuart return; 1443 10175 Stuart 1444 10175 Stuart for (i = 0; i < count; i++) { 1445 10175 Stuart DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x " 1446 10175 Stuart "pri=0x%x count=0x%x\n", inum, pri, count)); 1447 10175 Stuart ispec.intrspec_vec = inum + i; 1448 10175 Stuart ispec.intrspec_pri = pri; 1449 10175 Stuart if ((irqptr = msi_find_irq(dip, &ispec)) == NULL) { 1450 10175 Stuart cmn_err(CE_WARN, 1451 10175 Stuart "couldn't find irq %s,%s dip: 0x%p vec: %x pri: %x", 1452 10175 Stuart ddi_get_name(dip), ddi_get_name_addr(dip), 1453 10175 Stuart (void *)dip, inum + i, pri); 1454 10175 Stuart continue; 1455 10175 Stuart } 1456 10175 Stuart /* 1457 10175 Stuart * use PHYSDEVOP_unmap_pirq to have xen unmap MSI from a pirq 1458 10175 Stuart */ 1459 10175 Stuart unmap_pirq.domid = DOMID_SELF; 1460 10175 Stuart unmap_pirq.pirq = msi_vector_to_pirq[irqptr->airq_vector]; 1461 10175 Stuart rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_pirq); 1462 10175 Stuart if (rc < 0) { 1463 10175 Stuart cmn_err(CE_WARN, "unmap pirq failed"); 1464 10175 Stuart return; 1465 10175 Stuart } 1466 10175 Stuart irqptr->airq_mps_intr_index = FREE_INDEX; 1467 10175 Stuart apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ; 1468 10175 Stuart } 1469 5084 johnlev } 1470 5084 johnlev 1471 5084 johnlev /* 1472 5084 johnlev * The hypervisor doesn't permit access to local apics directly 1473 5084 johnlev */ 1474 5084 johnlev /* ARGSUSED */ 1475 5084 johnlev uint32_t * 1476 5084 johnlev mapin_apic(uint32_t addr, size_t len, int flags) 1477 5084 johnlev { 1478 5084 johnlev /* 1479 5084 johnlev * Return a pointer to a memory area to fake out the 1480 5084 johnlev * probe code that wants to read apic registers. 1481 5084 johnlev * The dummy values will end up being ignored by xen 1482 5084 johnlev * later on when they are used anyway. 1483 5084 johnlev */ 1484 5084 johnlev xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1485 5084 johnlev return (xen_psm_dummy_apic); 1486 5084 johnlev } 1487 5084 johnlev 1488 5084 johnlev /* ARGSUSED */ 1489 5084 johnlev uint32_t * 1490 5084 johnlev mapin_ioapic(uint32_t addr, size_t len, int flags) 1491 5084 johnlev { 1492 5084 johnlev /* 1493 5084 johnlev * Return non-null here to fake out configure code that calls this. 1494 5084 johnlev * The i86xpv platform will not reference through the returned value.. 1495 5084 johnlev */ 1496 5084 johnlev return ((uint32_t *)0x1); 1497 5084 johnlev } 1498 5084 johnlev 1499 5084 johnlev /* ARGSUSED */ 1500 5084 johnlev void 1501 5084 johnlev mapout_apic(caddr_t addr, size_t len) 1502 5084 johnlev { 1503 5084 johnlev } 1504 5084 johnlev 1505 5084 johnlev /* ARGSUSED */ 1506 5084 johnlev void 1507 5084 johnlev mapout_ioapic(caddr_t addr, size_t len) 1508 5084 johnlev { 1509 5084 johnlev } 1510 5084 johnlev 1511 5084 johnlev uint32_t 1512 5084 johnlev ioapic_read(int apic_ix, uint32_t reg) 1513 5084 johnlev { 1514 5084 johnlev physdev_apic_t apic; 1515 5084 johnlev 1516 5084 johnlev apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1517 5084 johnlev apic.reg = reg; 1518 5084 johnlev if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1519 5084 johnlev panic("read ioapic %d reg %d failed", apic_ix, reg); 1520 5084 johnlev return (apic.value); 1521 5084 johnlev } 1522 5084 johnlev 1523 5084 johnlev void 1524 5084 johnlev ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1525 5084 johnlev { 1526 5084 johnlev physdev_apic_t apic; 1527 5084 johnlev 1528 5084 johnlev apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1529 5084 johnlev apic.reg = reg; 1530 5084 johnlev apic.value = value; 1531 5084 johnlev if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1532 5084 johnlev panic("write ioapic %d reg %d failed", apic_ix, reg); 1533 5084 johnlev } 1534 5084 johnlev 1535 5084 johnlev /* 1536 7282 mishra * This function was added as part of x2APIC support in pcplusmp. 1537 7282 mishra */ 1538 7282 mishra void 1539 7282 mishra ioapic_write_eoi(int apic_ix, uint32_t value) 1540 7282 mishra { 1541 7282 mishra physdev_apic_t apic; 1542 7282 mishra 1543 7282 mishra apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1544 7282 mishra apic.reg = APIC_IO_EOI; 1545 7282 mishra apic.value = value; 1546 7282 mishra if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1547 7282 mishra panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix); 1548 7282 mishra } 1549 7282 mishra 1550 7282 mishra /* 1551 7282 mishra * This function was added as part of x2APIC support in pcplusmp to resolve 1552 7282 mishra * undefined symbol in xpv_psm. 1553 7282 mishra */ 1554 7282 mishra void 1555 7282 mishra x2apic_update_psm() 1556 7282 mishra { 1557 7282 mishra } 1558 7282 mishra 1559 7282 mishra /* 1560 7282 mishra * This function was added as part of x2APIC support in pcplusmp to resolve 1561 7282 mishra * undefined symbol in xpv_psm. 1562 7282 mishra */ 1563 7282 mishra void 1564 7282 mishra apic_ret() 1565 7282 mishra { 1566 7282 mishra } 1567 7282 mishra 1568 7282 mishra /* 1569 5084 johnlev * Call rebind to do the actual programming. 1570 5084 johnlev */ 1571 5084 johnlev int 1572 5084 johnlev apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1573 5084 johnlev { 1574 5084 johnlev apic_irq_t *irqptr; 1575 5084 johnlev struct ioapic_reprogram_data *drep = NULL; 1576 5084 johnlev int rv, cpu; 1577 5084 johnlev cpuset_t cpus; 1578 5084 johnlev 1579 5084 johnlev if (deferred) { 1580 5084 johnlev drep = (struct ioapic_reprogram_data *)p; 1581 5084 johnlev ASSERT(drep != NULL); 1582 5084 johnlev irqptr = drep->irqp; 1583 5084 johnlev } else { 1584 5084 johnlev irqptr = (apic_irq_t *)p; 1585 5084 johnlev } 1586 5084 johnlev ASSERT(irqptr != NULL); 1587 10175 Stuart /* 1588 10175 Stuart * Set cpu based on xen idea of online cpu's not apic tables. 1589 10175 Stuart * Note that xen ignores/sets to it's own preferred value the 1590 10175 Stuart * target cpu field when programming ioapic anyway. 1591 10175 Stuart */ 1592 10175 Stuart if (irqptr->airq_mps_intr_index == MSI_INDEX) 1593 10175 Stuart cpu = irqptr->airq_cpu; /* MSI cpus are already set */ 1594 10175 Stuart else { 1595 10175 Stuart cpu = xen_psm_bind_intr(irq); 1596 10175 Stuart irqptr->airq_cpu = cpu; 1597 10175 Stuart } 1598 10175 Stuart if (cpu == IRQ_UNBOUND) { 1599 10175 Stuart CPUSET_ZERO(cpus); 1600 10175 Stuart CPUSET_OR(cpus, xen_psm_cpus_online); 1601 10175 Stuart } else { 1602 10175 Stuart CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1603 10175 Stuart } 1604 5084 johnlev rv = apic_rebind(irqptr, cpu, drep); 1605 5084 johnlev if (rv) { 1606 5084 johnlev /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1607 5084 johnlev cpu = 0; 1608 10175 Stuart irqptr->airq_cpu = cpu; 1609 5084 johnlev rv = apic_rebind(irqptr, cpu, drep); 1610 5084 johnlev } 1611 5084 johnlev /* 1612 5084 johnlev * If rebind successful bind the irq to an event channel 1613 5084 johnlev */ 1614 5529 smaybe if (rv == 0) { 1615 5529 smaybe ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1616 5529 smaybe CPUSET_FIND(cpus, cpu); 1617 5529 smaybe apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1618 5529 smaybe } 1619 5084 johnlev return (rv); 1620 5084 johnlev } 1621 5084 johnlev 1622 5084 johnlev /* 1623 5084 johnlev * Allocate a new vector for the given irq 1624 5084 johnlev */ 1625 5084 johnlev /* ARGSUSED */ 1626 5084 johnlev uchar_t 1627 5084 johnlev apic_modify_vector(uchar_t vector, int irq) 1628 5084 johnlev { 1629 5084 johnlev return (apic_allocate_vector(0, irq, 0)); 1630 5084 johnlev } 1631 5084 johnlev 1632 5084 johnlev /* 1633 5084 johnlev * The rest of the file is just generic psm module boilerplate 1634 5084 johnlev */ 1635 5084 johnlev 1636 5084 johnlev static struct psm_ops xen_psm_ops = { 1637 5084 johnlev xen_psm_probe, /* psm_probe */ 1638 5084 johnlev 1639 5084 johnlev xen_psm_softinit, /* psm_init */ 1640 5084 johnlev xen_psm_picinit, /* psm_picinit */ 1641 5084 johnlev xen_psm_intr_enter, /* psm_intr_enter */ 1642 5084 johnlev xen_psm_intr_exit, /* psm_intr_exit */ 1643 5084 johnlev xen_psm_setspl, /* psm_setspl */ 1644 5084 johnlev xen_psm_addspl, /* psm_addspl */ 1645 5084 johnlev xen_psm_delspl, /* psm_delspl */ 1646 5084 johnlev xen_psm_disable_intr, /* psm_disable_intr */ 1647 5084 johnlev xen_psm_enable_intr, /* psm_enable_intr */ 1648 5084 johnlev (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1649 5084 johnlev (void (*)(int))NULL, /* psm_set_softintr */ 1650 5084 johnlev (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1651 5084 johnlev (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1652 5084 johnlev 1653 5084 johnlev xen_psm_clkinit, /* psm_clkinit */ 1654 5084 johnlev xen_psm_get_clockirq, /* psm_get_clockirq */ 1655 5084 johnlev xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1656 5084 johnlev xpv_gethrtime, /* psm_gethrtime */ 1657 5084 johnlev 1658 5084 johnlev xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1659 5084 johnlev xen_psm_cpu_start, /* psm_cpu_start */ 1660 5084 johnlev xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1661 5084 johnlev xen_psm_shutdown, /* psm_shutdown */ 1662 5084 johnlev xen_psm_get_ipivect, /* psm_get_ipivect */ 1663 5084 johnlev xen_psm_send_ipi, /* psm_send_ipi */ 1664 5084 johnlev 1665 5084 johnlev xen_psm_translate_irq, /* psm_translate_irq */ 1666 5084 johnlev 1667 5084 johnlev (void (*)(int, char *))NULL, /* psm_notify_error */ 1668 5084 johnlev (void (*)(int msg))NULL, /* psm_notify_func */ 1669 5084 johnlev xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1670 5084 johnlev xen_psm_timer_enable, /* psm_timer_enable */ 1671 5084 johnlev xen_psm_timer_disable, /* psm_timer_disable */ 1672 5084 johnlev (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1673 5084 johnlev (void (*)(int, int))NULL, /* psm_preshutdown */ 1674 7767 John xen_intr_ops, /* Advanced DDI Interrupt framework */ 1675 7767 John (int (*)(psm_state_request_t *))NULL /* psm_state */ 1676 5084 johnlev }; 1677 5084 johnlev 1678 5084 johnlev static struct psm_info xen_psm_info = { 1679 5084 johnlev PSM_INFO_VER01_5, /* version */ 1680 6356 mrj PSM_OWN_EXCLUSIVE, /* ownership */ 1681 5084 johnlev &xen_psm_ops, /* operation */ 1682 5529 smaybe "xVM_psm", /* machine name */ 1683 7542 Richard "platform module" /* machine descriptions */ 1684 5084 johnlev }; 1685 5084 johnlev 1686 5084 johnlev static void *xen_psm_hdlp; 1687 5084 johnlev 1688 5084 johnlev int 1689 5084 johnlev _init(void) 1690 5084 johnlev { 1691 5084 johnlev return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1692 5084 johnlev } 1693 5084 johnlev 1694 5084 johnlev int 1695 5084 johnlev _fini(void) 1696 5084 johnlev { 1697 5084 johnlev return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1698 5084 johnlev } 1699 5084 johnlev 1700 5084 johnlev int 1701 5084 johnlev _info(struct modinfo *modinfop) 1702 5084 johnlev { 1703 5084 johnlev return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1704 5084 johnlev } 1705