Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/hpet_acpi.h>
     27 #include <sys/hpet.h>
     28 #include <sys/bitmap.h>
     29 #include <sys/inttypes.h>
     30 #include <sys/time.h>
     31 #include <sys/sunddi.h>
     32 #include <sys/ksynch.h>
     33 #include <sys/apic.h>
     34 #include <sys/callb.h>
     35 #include <sys/clock.h>
     36 #include <sys/archsystm.h>
     37 #include <sys/cpupart.h>
     38 
     39 /*
     40  * hpet_state_lock is used to synchronize disabling/enabling deep c-states
     41  * and to synchronize suspend/resume.
     42  */
     43 static kmutex_t		hpet_state_lock;
     44 static struct hpet_state {
     45 	boolean_t	proxy_installed;	/* CBE proxy interrupt setup */
     46 	boolean_t	cpr;			/* currently in CPR */
     47 	boolean_t	cpu_deep_idle;		/* user enable/disable */
     48 	boolean_t	uni_cstate;		/* disable if only one cstate */
     49 } hpet_state = { B_FALSE, B_FALSE, B_TRUE, B_TRUE};
     50 
     51 uint64_t hpet_spin_check = HPET_SPIN_CHECK;
     52 uint64_t hpet_spin_timeout = HPET_SPIN_TIMEOUT;
     53 uint64_t hpet_idle_spin_timeout = HPET_SPIN_TIMEOUT;
     54 uint64_t hpet_isr_spin_timeout = HPET_SPIN_TIMEOUT;
     55 
     56 static kmutex_t		hpet_proxy_lock;	/* lock for lAPIC proxy data */
     57 /*
     58  * hpet_proxy_users is a per-cpu array.
     59  */
     60 static hpet_proxy_t	*hpet_proxy_users;	/* one per CPU */
     61 
     62 
     63 ACPI_TABLE_HPET		*hpet_table;		/* ACPI HPET table */
     64 hpet_info_t		hpet_info;		/* Human readable Information */
     65 
     66 /*
     67  * Provide HPET access from unix.so.
     68  * Set up pointers to access symbols in pcplusmp.
     69  */
     70 static void
     71 hpet_establish_hooks(void)
     72 {
     73 	hpet.install_proxy = &hpet_install_proxy;
     74 	hpet.callback = &hpet_callback;
     75 	hpet.use_hpet_timer = &hpet_use_hpet_timer;
     76 	hpet.use_lapic_timer = &hpet_use_lapic_timer;
     77 }
     78 
     79 /*
     80  * Get the ACPI "HPET" table.
     81  * acpi_probe() calls this function from mp_startup before drivers are loaded.
     82  * acpi_probe() verified the system is using ACPI before calling this.
     83  *
     84  * There may be more than one ACPI HPET table (Itanium only?).
     85  * Intel's HPET spec defines each timer block to have up to 32 counters and
     86  * be 1024 bytes long.  There can be more than one timer block of 32 counters.
     87  * Each timer block would have an additional ACPI HPET table.
     88  * Typical x86 systems today only have 1 HPET with 3 counters.
     89  * On x86 we only consume HPET table "1" for now.
     90  */
     91 int
     92 hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
     93 {
     94 	extern hrtime_t tsc_read(void);
     95 	extern int	idle_cpu_no_deep_c;
     96 	extern int	cpuid_deep_cstates_supported(void);
     97 	void		*la;
     98 	uint64_t	ret;
     99 	uint_t		num_timers;
    100 	uint_t		ti;
    101 
    102 	(void) memset(&hpet_info, 0, sizeof (hpet_info));
    103 	hpet.supported = HPET_NO_SUPPORT;
    104 
    105 	if (idle_cpu_no_deep_c)
    106 		return (DDI_FAILURE);
    107 
    108 	if (!cpuid_deep_cstates_supported())
    109 		return (DDI_FAILURE);
    110 
    111 	hpet_establish_hooks();
    112 
    113 	/*
    114 	 * Get HPET ACPI table 1.
    115 	 */
    116 	if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1,
    117 	    (ACPI_TABLE_HEADER **)&hpet_table))) {
    118 		cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table");
    119 		return (DDI_FAILURE);
    120 	}
    121 
    122 	if (hpet_validate_table(hpet_table) != AE_OK) {
    123 		cmn_err(CE_NOTE, "!hpet_acpi: invalid HPET table");
    124 		return (DDI_FAILURE);
    125 	}
    126 
    127 	la = hpet_memory_map(hpet_table);
    128 	if (la == NULL) {
    129 		cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed");
    130 		return (DDI_FAILURE);
    131 	}
    132 	hpet_info.logical_address = la;
    133 
    134 	ret = hpet_read_gen_cap(&hpet_info);
    135 	hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret);
    136 	hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret);
    137 	hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret);
    138 	hpet_info.gen_cap.count_size_cap = HPET_GCAP_CNT_SIZE_CAP(ret);
    139 	/*
    140 	 * Hardware contains the last timer's number.
    141 	 * Add 1 to get the number of timers.
    142 	 */
    143 	hpet_info.gen_cap.num_tim_cap = HPET_GCAP_NUM_TIM_CAP(ret) + 1;
    144 	hpet_info.gen_cap.rev_id = HPET_GCAP_REV_ID(ret);
    145 
    146 	if (hpet_info.gen_cap.counter_clk_period > HPET_MAX_CLK_PERIOD) {
    147 		cmn_err(CE_NOTE, "!hpet_acpi: COUNTER_CLK_PERIOD 0x%lx > 0x%lx",
    148 		    (long)hpet_info.gen_cap.counter_clk_period,
    149 		    (long)HPET_MAX_CLK_PERIOD);
    150 		return (DDI_FAILURE);
    151 	}
    152 
    153 	num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap;
    154 	if ((num_timers < 3) || (num_timers > 32)) {
    155 		cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers "
    156 		    "%lx", (long)num_timers);
    157 		return (DDI_FAILURE);
    158 	}
    159 	hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc(
    160 	    num_timers * sizeof (uint64_t), KM_SLEEP);
    161 
    162 	ret = hpet_read_gen_config(&hpet_info);
    163 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
    164 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
    165 
    166 	/*
    167 	 * Solaris does not use the HPET Legacy Replacement Route capabilities.
    168 	 * This feature has been off by default on test systems.
    169 	 * The HPET spec does not specify if Legacy Replacement Route is
    170 	 * on or off by default, so we explicitely set it off here.
    171 	 * It should not matter which mode the HPET is in since we use
    172 	 * the first available non-legacy replacement timer: timer 2.
    173 	 */
    174 	(void) hpet_set_leg_rt_cnf(&hpet_info, 0);
    175 
    176 	ret = hpet_read_gen_config(&hpet_info);
    177 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
    178 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
    179 
    180 	hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info);
    181 	hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info);
    182 
    183 	for (ti = 0; ti < num_timers; ++ti) {
    184 		ret = hpet_read_timer_N_config(&hpet_info, ti);
    185 		/*
    186 		 * Make sure no timers are enabled (think fast reboot or
    187 		 * virtual hardware).
    188 		 */
    189 		if (ret & HPET_TIMER_N_INT_ENB_CNF_BIT) {
    190 			hpet_disable_timer(&hpet_info, ti);
    191 			ret &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
    192 		}
    193 
    194 		hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret);
    195 	}
    196 
    197 	/*
    198 	 * Be aware the Main Counter may need to be initialized in the future
    199 	 * if it is used for more than just Deep C-State support.
    200 	 * The HPET's Main Counter does not need to be initialize to a specific
    201 	 * value before starting it for use to wake up CPUs from Deep C-States.
    202 	 */
    203 	if (hpet_start_main_counter(&hpet_info) != AE_OK) {
    204 		cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed");
    205 		return (DDI_FAILURE);
    206 	}
    207 
    208 	hpet_info.period = hpet_info.gen_cap.counter_clk_period;
    209 	/*
    210 	 * Read main counter twice to record HPET latency for debugging.
    211 	 */
    212 	hpet_info.tsc[0] = tsc_read();
    213 	hpet_info.hpet_main_counter_reads[0] =
    214 	    hpet_read_main_counter_value(&hpet_info);
    215 	hpet_info.tsc[1] = tsc_read();
    216 	hpet_info.hpet_main_counter_reads[1] =
    217 	    hpet_read_main_counter_value(&hpet_info);
    218 	hpet_info.tsc[2] = tsc_read();
    219 
    220 	ret = hpet_read_gen_config(&hpet_info);
    221 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
    222 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
    223 
    224 	/*
    225 	 * HPET main counter reads are supported now.
    226 	 */
    227 	hpet.supported = HPET_TIMER_SUPPORT;
    228 
    229 	return (hpet_init_proxy(hpet_vect, hpet_flags));
    230 }
    231 
    232 void
    233 hpet_acpi_fini(void)
    234 {
    235 	if (hpet.supported == HPET_NO_SUPPORT)
    236 		return;
    237 	if (hpet.supported >= HPET_TIMER_SUPPORT)
    238 		(void) hpet_stop_main_counter(&hpet_info);
    239 	if (hpet.supported > HPET_TIMER_SUPPORT)
    240 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
    241 }
    242 
    243 /*
    244  * Do initial setup to use a HPET timer as a proxy for Deep C-state stalled
    245  * LAPIC Timers.  Get a free HPET timer that supports I/O APIC routed interrupt.
    246  * Setup data to handle the timer's ISR, and add the timer's interrupt.
    247  *
    248  * The ddi cannot be use to allocate the HPET timer's interrupt.
    249  * ioapic_init_intr() in mp_platform_common() later sets up the I/O APIC
    250  * to handle the HPET timer's interrupt.
    251  *
    252  * Note: FSB (MSI) interrupts are not currently supported by Intel HPETs as of
    253  * ICH9.  The HPET spec allows for MSI.  In the future MSI may be prefered.
    254  */
    255 static int
    256 hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
    257 {
    258 	if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) {
    259 		cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed.");
    260 		return (DDI_FAILURE);
    261 	}
    262 
    263 	hpet_init_proxy_data();
    264 
    265 	if (hpet_install_interrupt_handler(&hpet_isr,
    266 	    hpet_info.cstate_timer.intr) != AE_OK) {
    267 		cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed.");
    268 		return (DDI_FAILURE);
    269 	}
    270 	*hpet_vect = hpet_info.cstate_timer.intr;
    271 	hpet_flags->intr_el = INTR_EL_LEVEL;
    272 	hpet_flags->intr_po = INTR_PO_ACTIVE_HIGH;
    273 	hpet_flags->bustype = BUS_PCI;		/*  we *do* conform to PCI */
    274 
    275 	/*
    276 	 * Avoid a possibly stuck interrupt by programing the HPET's timer here
    277 	 * before the I/O APIC is programmed to handle this interrupt.
    278 	 */
    279 	hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
    280 	    hpet_info.cstate_timer.intr);
    281 
    282 	/*
    283 	 * All HPET functionality is supported.
    284 	 */
    285 	hpet.supported = HPET_FULL_SUPPORT;
    286 	return (DDI_SUCCESS);
    287 }
    288 
    289 /*
    290  * Called by kernel if it can support Deep C-States.
    291  */
    292 static boolean_t
    293 hpet_install_proxy(void)
    294 {
    295 	if (hpet_state.proxy_installed == B_TRUE)
    296 		return (B_TRUE);
    297 
    298 	if (hpet.supported != HPET_FULL_SUPPORT)
    299 		return (B_FALSE);
    300 
    301 	hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
    302 	hpet_state.proxy_installed = B_TRUE;
    303 
    304 	return (B_TRUE);
    305 }
    306 
    307 /*
    308  * Remove the interrupt that was added with add_avintr() in
    309  * hpet_install_interrupt_handler().
    310  */
    311 static void
    312 hpet_uninstall_interrupt_handler(void)
    313 {
    314 	rem_avintr(NULL, CBE_HIGH_PIL, (avfunc)&hpet_isr,
    315 	    hpet_info.cstate_timer.intr);
    316 }
    317 
    318 static int
    319 hpet_validate_table(ACPI_TABLE_HPET *hpet_table)
    320 {
    321 	ACPI_TABLE_HEADER	*table_header = (ACPI_TABLE_HEADER *)hpet_table;
    322 
    323 	if (table_header->Length != sizeof (ACPI_TABLE_HPET)) {
    324 		cmn_err(CE_WARN, "!hpet_validate_table: Length %lx != sizeof ("
    325 		    "ACPI_TABLE_HPET) %lx.",
    326 		    (unsigned long)((ACPI_TABLE_HEADER *)hpet_table)->Length,
    327 		    (unsigned long)sizeof (ACPI_TABLE_HPET));
    328 		return (AE_ERROR);
    329 	}
    330 
    331 	if (!ACPI_COMPARE_NAME(table_header->Signature, ACPI_SIG_HPET)) {
    332 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET table "
    333 		    "signature");
    334 		return (AE_ERROR);
    335 	}
    336 
    337 	if (!hpet_checksum_table((unsigned char *)hpet_table,
    338 	    (unsigned int)table_header->Length)) {
    339 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET checksum");
    340 		return (AE_ERROR);
    341 	}
    342 
    343 	/*
    344 	 * Sequence should be table number - 1.  We are using table 1.
    345 	 */
    346 	if (hpet_table->Sequence != HPET_TABLE_1 - 1) {
    347 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid Sequence %lx",
    348 		    (long)hpet_table->Sequence);
    349 		return (AE_ERROR);
    350 	}
    351 
    352 	return (AE_OK);
    353 }
    354 
    355 static boolean_t
    356 hpet_checksum_table(unsigned char *table, unsigned int length)
    357 {
    358 	unsigned char	checksum = 0;
    359 	int		i;
    360 
    361 	for (i = 0; i < length; ++i, ++table)
    362 		checksum += *table;
    363 
    364 	return (checksum == 0);
    365 }
    366 
    367 static void *
    368 hpet_memory_map(ACPI_TABLE_HPET *hpet_table)
    369 {
    370 	return (AcpiOsMapMemory(hpet_table->Address.Address, HPET_SIZE));
    371 }
    372 
    373 static int
    374 hpet_start_main_counter(hpet_info_t *hip)
    375 {
    376 	uint64_t	*gcr_ptr;
    377 	uint64_t	gcr;
    378 
    379 	gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
    380 	gcr = *gcr_ptr;
    381 
    382 	gcr |= HPET_GCFR_ENABLE_CNF;
    383 	*gcr_ptr = gcr;
    384 	gcr = *gcr_ptr;
    385 
    386 	return (gcr & HPET_GCFR_ENABLE_CNF ? AE_OK : ~AE_OK);
    387 }
    388 
    389 static int
    390 hpet_stop_main_counter(hpet_info_t *hip)
    391 {
    392 	uint64_t	*gcr_ptr;
    393 	uint64_t	gcr;
    394 
    395 	gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
    396 	gcr = *gcr_ptr;
    397 
    398 	gcr &= ~HPET_GCFR_ENABLE_CNF;
    399 	*gcr_ptr = gcr;
    400 	gcr = *gcr_ptr;
    401 
    402 	return (gcr & HPET_GCFR_ENABLE_CNF ? ~AE_OK : AE_OK);
    403 }
    404 
    405 /*
    406  * Set the Legacy Replacement Route bit.
    407  * This should be called before setting up timers.
    408  * The HPET specification is silent regarding setting this after timers are
    409  * programmed.
    410  */
    411 static uint64_t
    412 hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value)
    413 {
    414 	uint64_t gen_conf = hpet_read_gen_config(hip);
    415 
    416 	switch (new_value) {
    417 	case 0:
    418 		gen_conf &= ~HPET_GCFR_LEG_RT_CNF;
    419 		break;
    420 
    421 	case HPET_GCFR_LEG_RT_CNF:
    422 		gen_conf |= HPET_GCFR_LEG_RT_CNF;
    423 		break;
    424 
    425 	default:
    426 		ASSERT(new_value == 0 || new_value == HPET_GCFR_LEG_RT_CNF);
    427 		break;
    428 	}
    429 	hpet_write_gen_config(hip, gen_conf);
    430 	return (gen_conf);
    431 }
    432 
    433 static uint64_t
    434 hpet_read_gen_cap(hpet_info_t *hip)
    435 {
    436 	return (*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address));
    437 }
    438 
    439 static uint64_t
    440 hpet_read_gen_config(hpet_info_t *hip)
    441 {
    442 	return (*(uint64_t *)
    443 	    HPET_GEN_CONFIG_ADDRESS(hip->logical_address));
    444 }
    445 
    446 static uint64_t
    447 hpet_read_gen_intrpt_stat(hpet_info_t *hip)
    448 {
    449 	hip->gen_intrpt_stat = *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(
    450 	    hip->logical_address);
    451 	return (hip->gen_intrpt_stat);
    452 }
    453 
    454 static uint64_t
    455 hpet_read_timer_N_config(hpet_info_t *hip, uint_t n)
    456 {
    457 	uint64_t conf = *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
    458 	    hip->logical_address, n);
    459 	hip->timer_n_config[n] = hpet_convert_timer_N_config(conf);
    460 	return (conf);
    461 }
    462 
    463 static hpet_TN_conf_cap_t
    464 hpet_convert_timer_N_config(uint64_t conf)
    465 {
    466 	hpet_TN_conf_cap_t cc = { 0 };
    467 
    468 	cc.int_route_cap = HPET_TIMER_N_INT_ROUTE_CAP(conf);
    469 	cc.fsb_int_del_cap = HPET_TIMER_N_FSB_INT_DEL_CAP(conf);
    470 	cc.fsb_int_en_cnf = HPET_TIMER_N_FSB_EN_CNF(conf);
    471 	cc.int_route_cnf = HPET_TIMER_N_INT_ROUTE_CNF(conf);
    472 	cc.mode32_cnf = HPET_TIMER_N_MODE32_CNF(conf);
    473 	cc.val_set_cnf = HPET_TIMER_N_VAL_SET_CNF(conf);
    474 	cc.size_cap = HPET_TIMER_N_SIZE_CAP(conf);
    475 	cc.per_int_cap = HPET_TIMER_N_PER_INT_CAP(conf);
    476 	cc.type_cnf = HPET_TIMER_N_TYPE_CNF(conf);
    477 	cc.int_enb_cnf = HPET_TIMER_N_INT_ENB_CNF(conf);
    478 	cc.int_type_cnf = HPET_TIMER_N_INT_TYPE_CNF(conf);
    479 
    480 	return (cc);
    481 }
    482 
    483 static uint64_t
    484 hpet_read_timer_N_comp(hpet_info_t *hip, uint_t n)
    485 {
    486 	if (hip->timer_n_config[n].size_cap == 1)
    487 		return (*(uint64_t *)
    488 		    HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n));
    489 	else
    490 		return (*(uint32_t *)
    491 		    HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n));
    492 }
    493 
    494 static uint64_t
    495 hpet_read_main_counter_value(hpet_info_t *hip)
    496 {
    497 	uint64_t	value;
    498 	uint32_t	*counter;
    499 	uint32_t	high1, high2, low;
    500 
    501 	counter = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address);
    502 
    503 	/*
    504 	 * 32-bit main counters
    505 	 */
    506 	if (hip->gen_cap.count_size_cap == 0) {
    507 		value = (uint64_t)*counter;
    508 		hip->main_counter_value = value;
    509 		return (value);
    510 	}
    511 
    512 	/*
    513 	 * HPET spec claims a 64-bit read can be split into two 32-bit reads
    514 	 * by the hardware connection to the HPET.
    515 	 */
    516 	high2 = counter[1];
    517 	do {
    518 		high1 = high2;
    519 		low = counter[0];
    520 		high2 = counter[1];
    521 	} while (high2 != high1);
    522 
    523 	value = ((uint64_t)high1 << 32) | low;
    524 	hip->main_counter_value = value;
    525 	return (value);
    526 }
    527 
    528 static void
    529 hpet_write_gen_cap(hpet_info_t *hip, uint64_t l)
    530 {
    531 	*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address) = l;
    532 }
    533 
    534 static void
    535 hpet_write_gen_config(hpet_info_t *hip, uint64_t l)
    536 {
    537 	*(uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address) = l;
    538 }
    539 
    540 static void
    541 hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l)
    542 {
    543 	*(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(hip->logical_address) = l;
    544 }
    545 
    546 static void
    547 hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l)
    548 {
    549 	if (hip->timer_n_config[n].size_cap == 1)
    550 		*(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
    551 		    hip->logical_address, n) = l;
    552 	else
    553 		*(uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
    554 		    hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l);
    555 }
    556 
    557 static void
    558 hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l)
    559 {
    560 	*(uint64_t *)HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n) = l;
    561 }
    562 
    563 static void
    564 hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n)
    565 {
    566 	uint64_t l;
    567 
    568 	l = hpet_read_timer_N_config(hip, timer_n);
    569 	l &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
    570 	hpet_write_timer_N_config(hip, timer_n, l);
    571 }
    572 
    573 static void
    574 hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n)
    575 {
    576 	uint64_t l;
    577 
    578 	l = hpet_read_timer_N_config(hip, timer_n);
    579 	l |= HPET_TIMER_N_INT_ENB_CNF_BIT;
    580 	hpet_write_timer_N_config(hip, timer_n, l);
    581 }
    582 
    583 static void
    584 hpet_write_main_counter_value(hpet_info_t *hip, uint64_t l)
    585 {
    586 	uint32_t	*address;
    587 
    588 	/*
    589 	 * HPET spec 1.0a states main counter register should be halted before
    590 	 * it is written to.
    591 	 */
    592 	ASSERT(!(hpet_read_gen_config(hip) & HPET_GCFR_ENABLE_CNF));
    593 
    594 	if (hip->gen_cap.count_size_cap == 1) {
    595 		*(uint64_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address)
    596 		    = l;
    597 	} else {
    598 		address = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(
    599 		    hip->logical_address);
    600 
    601 		address[0] = (uint32_t)(l & 0xFFFFFFFF);
    602 	}
    603 }
    604 
    605 /*
    606  * Add the interrupt handler for I/O APIC interrupt number (interrupt line).
    607  *
    608  * The I/O APIC line (vector) is programmed in ioapic_init_intr() called
    609  * from apic_picinit() psm_ops apic_ops entry point after we return from
    610  * apic_init() psm_ops entry point.
    611  */
    612 static uint32_t
    613 hpet_install_interrupt_handler(uint_t (*func)(char *), int vector)
    614 {
    615 	uint32_t retval;
    616 
    617 	retval = add_avintr(NULL, CBE_HIGH_PIL, (avfunc)func, "HPET Timer",
    618 	    vector, NULL, NULL, NULL, NULL);
    619 	if (retval == 0) {
    620 		cmn_err(CE_WARN, "!hpet_acpi: add_avintr() failed");
    621 		return (AE_BAD_PARAMETER);
    622 	}
    623 	return (AE_OK);
    624 }
    625 
    626 /*
    627  * The HPET timers specify which I/O APIC interrupts they can be routed to.
    628  * Find the first available non-legacy-replacement timer and its I/O APIC irq.
    629  * Supported I/O APIC IRQs are specified in the int_route_cap bitmap in each
    630  * timer's timer_n_config register.
    631  */
    632 static int
    633 hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip)
    634 {
    635 	int	timer;
    636 	int	intr;
    637 
    638 	for (timer = HPET_FIRST_NON_LEGACY_TIMER;
    639 	    timer < hip->gen_cap.num_tim_cap; ++timer) {
    640 
    641 		if (!hpet_timer_available(hip->allocated_timers, timer))
    642 			continue;
    643 
    644 		intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1;
    645 		if (intr >= 0) {
    646 			hpet_timer_alloc(&hip->allocated_timers, timer);
    647 			hip->cstate_timer.timer = timer;
    648 			hip->cstate_timer.intr = intr;
    649 			return (timer);
    650 		}
    651 	}
    652 
    653 	return (-1);
    654 }
    655 
    656 /*
    657  * Mark this timer as used.
    658  */
    659 static void
    660 hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n)
    661 {
    662 	*allocated_timers |= 1 << n;
    663 }
    664 
    665 /*
    666  * Check if this timer is available.
    667  * No mutual exclusion because only one thread uses this.
    668  */
    669 static int
    670 hpet_timer_available(uint32_t allocated_timers, uint32_t n)
    671 {
    672 	return ((allocated_timers & (1 << n)) == 0);
    673 }
    674 
    675 /*
    676  * Setup timer N to route its interrupt to I/O APIC.
    677  */
    678 static void
    679 hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
    680 {
    681 	uint64_t conf;
    682 
    683 	conf = hpet_read_timer_N_config(hip, timer_n);
    684 
    685 	/*
    686 	 * Caller is required to verify this interrupt route is supported.
    687 	 */
    688 	ASSERT(HPET_TIMER_N_INT_ROUTE_CAP(conf) & (1 << interrupt));
    689 
    690 	conf &= ~HPET_TIMER_N_FSB_EN_CNF_BIT;	/* use IOAPIC */
    691 	conf |= HPET_TIMER_N_INT_ROUTE_SHIFT(interrupt);
    692 	conf &= ~HPET_TIMER_N_TYPE_CNF_BIT;	/* non periodic */
    693 	conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;	/* disabled */
    694 	conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT;	/* Level Triggered */
    695 
    696 	hpet_write_timer_N_config(hip, timer_n, conf);
    697 }
    698 
    699 /*
    700  * The HPET's Main Counter is not stopped before programming an HPET timer.
    701  * This will allow the HPET to be used as a time source.
    702  * The programmed timer interrupt may occur before this function returns.
    703  * Callers must block interrupts before calling this function if they must
    704  * guarantee the interrupt is handled after this function returns.
    705  *
    706  * Return 0 if main counter is less than timer after enabling timer.
    707  * The interrupt was programmed, but it may fire before this returns.
    708  * Return !0 if main counter is greater than timer after enabling timer.
    709  * In other words: the timer will not fire, and we do not know if it did fire.
    710  *
    711  * delta is in HPET ticks.
    712  *
    713  * Writing a 64-bit value to a 32-bit register will "wrap around".
    714  * A 32-bit HPET timer will wrap around in a little over 5 minutes.
    715  */
    716 int
    717 hpet_timer_program(hpet_info_t *hip, uint32_t timer, uint64_t delta)
    718 {
    719 	uint64_t time, program;
    720 
    721 	program = hpet_read_main_counter_value(hip);
    722 	program += delta;
    723 	hpet_write_timer_N_comp(hip, timer, program);
    724 
    725 	time = hpet_read_main_counter_value(hip);
    726 	if (time < program)
    727 		return (AE_OK);
    728 
    729 	return (AE_TIME);
    730 }
    731 
    732 /*
    733  * CPR and power policy-change callback entry point.
    734  */
    735 boolean_t
    736 hpet_callback(int code)
    737 {
    738 	switch (code) {
    739 	case PM_DEFAULT_CPU_DEEP_IDLE:
    740 		/*FALLTHROUGH*/
    741 	case PM_ENABLE_CPU_DEEP_IDLE:
    742 		/*FALLTHROUGH*/
    743 	case PM_DISABLE_CPU_DEEP_IDLE:
    744 		return (hpet_deep_idle_config(code));
    745 
    746 	case CB_CODE_CPR_RESUME:
    747 		/*FALLTHROUGH*/
    748 	case CB_CODE_CPR_CHKPT:
    749 		return (hpet_cpr(code));
    750 
    751 	case CST_EVENT_MULTIPLE_CSTATES:
    752 		hpet_cst_callback(CST_EVENT_MULTIPLE_CSTATES);
    753 		return (B_TRUE);
    754 
    755 	case CST_EVENT_ONE_CSTATE:
    756 		hpet_cst_callback(CST_EVENT_ONE_CSTATE);
    757 		return (B_TRUE);
    758 
    759 	default:
    760 		cmn_err(CE_NOTE, "!hpet_callback: invalid code %d\n", code);
    761 		return (B_FALSE);
    762 	}
    763 }
    764 
    765 /*
    766  * According to the HPET spec 1.0a: the Operating System must save and restore
    767  * HPET event timer hardware context through ACPI sleep state transitions.
    768  * Timer registers (including the main counter) may not be preserved through
    769  * ACPI S3, S4, or S5 sleep states.  This code does not not support S1 nor S2.
    770  *
    771  * Current HPET state is already in hpet.supported and
    772  * hpet_state.proxy_installed.  hpet_info contains the proxy interrupt HPET
    773  * Timer state.
    774  *
    775  * Future projects beware: the HPET Main Counter is undefined after ACPI S3 or
    776  * S4, and it is not saved/restored here.  Future projects cannot expect the
    777  * Main Counter to be monotomically (or accurately) increasing across CPR.
    778  *
    779  * Note: the CPR Checkpoint path later calls pause_cpus() which ensures all
    780  * CPUs are awake and in a spin loop before the system suspends.  The HPET is
    781  * not needed for Deep C-state wakeup when CPUs are in cpu_pause().
    782  * It is safe to leave the HPET running as the system suspends; we just
    783  * disable the timer from generating interrupts here.
    784  */
    785 static boolean_t
    786 hpet_cpr(int code)
    787 {
    788 	ulong_t		intr, dead_count = 0;
    789 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
    790 	boolean_t	ret = B_TRUE;
    791 
    792 	mutex_enter(&hpet_state_lock);
    793 	switch (code) {
    794 	case CB_CODE_CPR_CHKPT:
    795 		if (hpet_state.proxy_installed == B_FALSE)
    796 			break;
    797 
    798 		hpet_state.cpr = B_TRUE;
    799 
    800 		intr = intr_clear();
    801 		while (!mutex_tryenter(&hpet_proxy_lock)) {
    802 			/*
    803 			 * spin
    804 			 */
    805 			intr_restore(intr);
    806 			if (dead_count++ > hpet_spin_check) {
    807 				dead_count = 0;
    808 				if (gethrtime() > dead) {
    809 					hpet_state.cpr = B_FALSE;
    810 					mutex_exit(&hpet_state_lock);
    811 					cmn_err(CE_NOTE, "!hpet_cpr: deadman");
    812 					return (B_FALSE);
    813 				}
    814 			}
    815 			intr = intr_clear();
    816 		}
    817 		hpet_expire_all();
    818 		mutex_exit(&hpet_proxy_lock);
    819 		intr_restore(intr);
    820 
    821 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
    822 		break;
    823 
    824 	case CB_CODE_CPR_RESUME:
    825 		if (hpet_resume() == B_TRUE)
    826 			hpet_state.cpr = B_FALSE;
    827 		else
    828 			cmn_err(CE_NOTE, "!hpet_resume failed.");
    829 		break;
    830 
    831 	default:
    832 		cmn_err(CE_NOTE, "!hpet_cpr: invalid code %d\n", code);
    833 		ret = B_FALSE;
    834 		break;
    835 	}
    836 	mutex_exit(&hpet_state_lock);
    837 	return (ret);
    838 }
    839 
    840 /*
    841  * Assume the HPET stopped in Suspend state and timer state was lost.
    842  */
    843 static boolean_t
    844 hpet_resume(void)
    845 {
    846 	if (hpet.supported != HPET_TIMER_SUPPORT)
    847 		return (B_TRUE);
    848 
    849 	/*
    850 	 * The HPET spec does not specify if Legacy Replacement Route is
    851 	 * on or off by default, so we set it off here.
    852 	 */
    853 	(void) hpet_set_leg_rt_cnf(&hpet_info, 0);
    854 
    855 	if (hpet_start_main_counter(&hpet_info) != AE_OK) {
    856 		cmn_err(CE_NOTE, "!hpet_resume: start main counter failed");
    857 		hpet.supported = HPET_NO_SUPPORT;
    858 		if (hpet_state.proxy_installed == B_TRUE) {
    859 			hpet_state.proxy_installed = B_FALSE;
    860 			hpet_uninstall_interrupt_handler();
    861 		}
    862 		return (B_FALSE);
    863 	}
    864 
    865 	if (hpet_state.proxy_installed == B_FALSE)
    866 		return (B_TRUE);
    867 
    868 	hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
    869 	    hpet_info.cstate_timer.intr);
    870 	if (hpet_state.cpu_deep_idle == B_TRUE)
    871 		hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
    872 
    873 	return (B_TRUE);
    874 }
    875 
    876 /*
    877  * Callback to enable/disable Deep C-States based on power.conf setting.
    878  */
    879 static boolean_t
    880 hpet_deep_idle_config(int code)
    881 {
    882 	ulong_t		intr, dead_count = 0;
    883 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
    884 	boolean_t	ret = B_TRUE;
    885 
    886 	mutex_enter(&hpet_state_lock);
    887 	switch (code) {
    888 	case PM_DEFAULT_CPU_DEEP_IDLE:
    889 		/*FALLTHROUGH*/
    890 	case PM_ENABLE_CPU_DEEP_IDLE:
    891 
    892 		if (hpet_state.cpu_deep_idle == B_TRUE)
    893 			break;
    894 
    895 		if (hpet_state.proxy_installed == B_FALSE) {
    896 			ret = B_FALSE;  /* Deep C-States not supported */
    897 			break;
    898 		}
    899 
    900 		hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
    901 		hpet_state.cpu_deep_idle = B_TRUE;
    902 		break;
    903 
    904 	case PM_DISABLE_CPU_DEEP_IDLE:
    905 
    906 		if ((hpet_state.cpu_deep_idle == B_FALSE) ||
    907 		    (hpet_state.proxy_installed == B_FALSE))
    908 			break;
    909 
    910 		/*
    911 		 * The order of these operations is important to avoid
    912 		 * lost wakeups: Set a flag to refuse all future LAPIC Timer
    913 		 * proxy requests, then wake up all CPUs from deep C-state,
    914 		 * and finally disable the HPET interrupt-generating timer.
    915 		 */
    916 		hpet_state.cpu_deep_idle = B_FALSE;
    917 
    918 		intr = intr_clear();
    919 		while (!mutex_tryenter(&hpet_proxy_lock)) {
    920 			/*
    921 			 * spin
    922 			 */
    923 			intr_restore(intr);
    924 			if (dead_count++ > hpet_spin_check) {
    925 				dead_count = 0;
    926 				if (gethrtime() > dead) {
    927 					hpet_state.cpu_deep_idle = B_TRUE;
    928 					mutex_exit(&hpet_state_lock);
    929 					cmn_err(CE_NOTE,
    930 					    "!hpet_deep_idle_config: deadman");
    931 					return (B_FALSE);
    932 				}
    933 			}
    934 			intr = intr_clear();
    935 		}
    936 		hpet_expire_all();
    937 		mutex_exit(&hpet_proxy_lock);
    938 		intr_restore(intr);
    939 
    940 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
    941 		break;
    942 
    943 	default:
    944 		cmn_err(CE_NOTE, "!hpet_deep_idle_config: invalid code %d\n",
    945 		    code);
    946 		ret = B_FALSE;
    947 		break;
    948 	}
    949 	mutex_exit(&hpet_state_lock);
    950 
    951 	return (ret);
    952 }
    953 
    954 /*
    955  * Callback for _CST c-state change notifications.
    956  */
    957 static void
    958 hpet_cst_callback(uint32_t code)
    959 {
    960 	ulong_t		intr, dead_count = 0;
    961 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
    962 
    963 	switch (code) {
    964 	case CST_EVENT_ONE_CSTATE:
    965 		hpet_state.uni_cstate = B_TRUE;
    966 		intr = intr_clear();
    967 		while (!mutex_tryenter(&hpet_proxy_lock)) {
    968 			/*
    969 			 * spin
    970 			 */
    971 			intr_restore(intr);
    972 			if (dead_count++ > hpet_spin_check) {
    973 				dead_count = 0;
    974 				if (gethrtime() > dead) {
    975 					hpet_expire_all();
    976 					cmn_err(CE_NOTE,
    977 					    "!hpet_cst_callback: deadman");
    978 					return;
    979 				}
    980 			}
    981 			intr = intr_clear();
    982 		}
    983 		hpet_expire_all();
    984 		mutex_exit(&hpet_proxy_lock);
    985 		intr_restore(intr);
    986 		break;
    987 
    988 	case CST_EVENT_MULTIPLE_CSTATES:
    989 		hpet_state.uni_cstate = B_FALSE;
    990 		break;
    991 
    992 	default:
    993 		cmn_err(CE_NOTE, "!hpet_cst_callback: invalid code %d\n", code);
    994 		break;
    995 	}
    996 }
    997 
    998 /*
    999  * Interrupt Service Routine for HPET I/O-APIC-generated interrupts.
   1000  * Used to wakeup CPUs from Deep C-state when their Local APIC Timer stops.
   1001  * This ISR runs on one CPU which pokes other CPUs out of Deep C-state as
   1002  * needed.
   1003  */
   1004 /* ARGSUSED */
   1005 static uint_t
   1006 hpet_isr(char *arg)
   1007 {
   1008 	uint64_t	timer_status;
   1009 	uint64_t	timer_mask;
   1010 	ulong_t		intr, dead_count = 0;
   1011 	hrtime_t	dead = gethrtime() + hpet_isr_spin_timeout;
   1012 
   1013 	timer_mask = HPET_INTR_STATUS_MASK(hpet_info.cstate_timer.timer);
   1014 
   1015 	/*
   1016 	 * We are using a level-triggered interrupt.
   1017 	 * HPET sets timer's General Interrupt Status Register bit N.
   1018 	 * ISR checks this bit to see if it needs servicing.
   1019 	 * ISR then clears this bit by writing 1 to that bit.
   1020 	 */
   1021 	timer_status = hpet_read_gen_intrpt_stat(&hpet_info);
   1022 	if (!(timer_status & timer_mask))
   1023 		return (DDI_INTR_UNCLAIMED);
   1024 	hpet_write_gen_intrpt_stat(&hpet_info, timer_mask);
   1025 
   1026 	/*
   1027 	 * Do not touch ISR data structures before checking the HPET's General
   1028 	 * Interrupt Status register.  The General Interrupt Status register
   1029 	 * will not be set by hardware until after timer interrupt generation
   1030 	 * is enabled by software.  Software allocates necessary data
   1031 	 * structures before enabling timer interrupts.  ASSERT the software
   1032 	 * data structures required to handle this interrupt are initialized.
   1033 	 */
   1034 	ASSERT(hpet_proxy_users != NULL);
   1035 
   1036 	/*
   1037 	 * CPUs in deep c-states do not enable interrupts until after
   1038 	 * performing idle cleanup which includes descheduling themselves from
   1039 	 * the HPET.  The CPU running this ISR will NEVER find itself in the
   1040 	 * proxy list.  A lost wakeup may occur if this is false.
   1041 	 */
   1042 	ASSERT(hpet_proxy_users[CPU->cpu_id] == HPET_INFINITY);
   1043 
   1044 	/*
   1045 	 * Higher level interrupts may deadlock with CPUs going idle if this
   1046 	 * ISR is prempted while holding hpet_proxy_lock.
   1047 	 */
   1048 	intr = intr_clear();
   1049 	while (!mutex_tryenter(&hpet_proxy_lock)) {
   1050 		/*
   1051 		 * spin
   1052 		 */
   1053 		intr_restore(intr);
   1054 		if (dead_count++ > hpet_spin_check) {
   1055 			dead_count = 0;
   1056 			if (gethrtime() > dead) {
   1057 				hpet_expire_all();
   1058 				return (DDI_INTR_CLAIMED);
   1059 			}
   1060 		}
   1061 		intr = intr_clear();
   1062 	}
   1063 	(void) hpet_guaranteed_schedule(HPET_INFINITY);
   1064 	mutex_exit(&hpet_proxy_lock);
   1065 	intr_restore(intr);
   1066 
   1067 	return (DDI_INTR_CLAIMED);
   1068 }
   1069 
   1070 /*
   1071  * Used when disabling the HPET Timer interrupt.  CPUs in Deep C-state must be
   1072  * woken up because they can no longer rely on the HPET's Timer to wake them.
   1073  * We do not need to wait for CPUs to wakeup.
   1074  */
   1075 static void
   1076 hpet_expire_all(void)
   1077 {
   1078 	processorid_t	id;
   1079 
   1080 	for (id = 0; id < max_ncpus; ++id) {
   1081 		if (hpet_proxy_users[id] != HPET_INFINITY) {
   1082 			hpet_proxy_users[id] = HPET_INFINITY;
   1083 			if (id != CPU->cpu_id)
   1084 				poke_cpu(id);
   1085 		}
   1086 	}
   1087 }
   1088 
   1089 /*
   1090  * To avoid missed wakeups this function must guarantee either the HPET timer
   1091  * was successfully programmed to the next expire time or there are no waiting
   1092  * CPUs.
   1093  *
   1094  * Callers cannot enter C2 or deeper if the HPET could not be programmed to
   1095  * generate its next interrupt to happen at required_wakeup_time or sooner.
   1096  * Returns B_TRUE if the HPET was programmed to interrupt by
   1097  * required_wakeup_time, B_FALSE if not.
   1098  */
   1099 static boolean_t
   1100 hpet_guaranteed_schedule(hrtime_t required_wakeup_time)
   1101 {
   1102 	hrtime_t	now, next_proxy_time;
   1103 	processorid_t	id, next_proxy_id;
   1104 	int		proxy_timer = hpet_info.cstate_timer.timer;
   1105 	boolean_t	done = B_FALSE;
   1106 
   1107 	ASSERT(mutex_owned(&hpet_proxy_lock));
   1108 
   1109 	/*
   1110 	 * Loop until we successfully program the HPET,
   1111 	 * or no CPUs are scheduled to use the HPET as a proxy.
   1112 	 */
   1113 	do {
   1114 		/*
   1115 		 * Wake all CPUs that expired before now.
   1116 		 * Find the next CPU to wake up and next HPET program time.
   1117 		 */
   1118 		now = gethrtime();
   1119 		next_proxy_time = HPET_INFINITY;
   1120 		next_proxy_id = CPU->cpu_id;
   1121 		for (id = 0; id < max_ncpus; ++id) {
   1122 			if (hpet_proxy_users[id] < now) {
   1123 				hpet_proxy_users[id] = HPET_INFINITY;
   1124 				if (id != CPU->cpu_id)
   1125 					poke_cpu(id);
   1126 			} else if (hpet_proxy_users[id] < next_proxy_time) {
   1127 				next_proxy_time = hpet_proxy_users[id];
   1128 				next_proxy_id = id;
   1129 			}
   1130 		}
   1131 
   1132 		if (next_proxy_time == HPET_INFINITY) {
   1133 			done = B_TRUE;
   1134 			/*
   1135 			 * There are currently no CPUs using the HPET's Timer
   1136 			 * as a proxy for their LAPIC Timer.  The HPET's Timer
   1137 			 * does not need to be programmed.
   1138 			 *
   1139 			 * Letting the HPET timer wrap around to the current
   1140 			 * time is the longest possible timeout.
   1141 			 * A 64-bit timer will wrap around in ~ 2^44 seconds.
   1142 			 * A 32-bit timer will wrap around in ~ 2^12 seconds.
   1143 			 *
   1144 			 * Disabling the HPET's timer interrupt requires a
   1145 			 * (relatively expensive) write to the HPET.
   1146 			 * Instead we do nothing.
   1147 			 *
   1148 			 * We are gambling some CPU will attempt to enter a
   1149 			 * deep c-state before the timer wraps around.
   1150 			 * We assume one spurious interrupt in a little over an
   1151 			 * hour has less performance impact than writing to the
   1152 			 * HPET's timer disable bit every time all CPUs wakeup
   1153 			 * from deep c-state.
   1154 			 */
   1155 
   1156 		} else {
   1157 			/*
   1158 			 * Idle CPUs disable interrupts before programming the
   1159 			 * HPET to prevent a lost wakeup if the HPET
   1160 			 * interrupts the idle cpu before it can enter a
   1161 			 * Deep C-State.
   1162 			 */
   1163 			if (hpet_timer_program(&hpet_info, proxy_timer,
   1164 			    HRTIME_TO_HPET_TICKS(next_proxy_time - gethrtime()))
   1165 			    != AE_OK) {
   1166 				/*
   1167 				 * We could not program the HPET to wakeup the
   1168 				 * next CPU.  We must wake the CPU ourself to
   1169 				 * avoid a lost wakeup.
   1170 				 */
   1171 				hpet_proxy_users[next_proxy_id] = HPET_INFINITY;
   1172 				if (next_proxy_id != CPU->cpu_id)
   1173 					poke_cpu(next_proxy_id);
   1174 			} else {
   1175 				done = B_TRUE;
   1176 			}
   1177 		}
   1178 
   1179 	} while (!done);
   1180 
   1181 	return (next_proxy_time <= required_wakeup_time);
   1182 }
   1183 
   1184 /*
   1185  * Use an HPET timer to act as this CPU's proxy local APIC timer.
   1186  * Used in deep c-states C2 and above while the CPU's local APIC timer stalls.
   1187  * Called by the idle thread with interrupts enabled.
   1188  * Always returns with interrupts disabled.
   1189  *
   1190  * There are 3 possible outcomes from this function:
   1191  * 1. The Local APIC Timer was already disabled before this function was called.
   1192  *	LAPIC TIMER	: disabled
   1193  *	HPET		: not scheduled to wake this CPU
   1194  *	*lapic_expire	: (hrtime_t)HPET_INFINITY
   1195  *	Returns		: B_TRUE
   1196  * 2. Successfully programmed the HPET to act as a LAPIC Timer proxy.
   1197  *	LAPIC TIMER	: disabled
   1198  *	HPET		: scheduled to wake this CPU
   1199  *	*lapic_expire	: hrtime_t when LAPIC timer would have expired
   1200  *	Returns		: B_TRUE
   1201  * 3. Failed to programmed the HPET to act as a LAPIC Timer proxy.
   1202  *	LAPIC TIMER	: enabled
   1203  *	HPET		: not scheduled to wake this CPU
   1204  *	*lapic_expire	: (hrtime_t)HPET_INFINITY
   1205  *	Returns		: B_FALSE
   1206  *
   1207  * The idle thread cannot enter Deep C-State in case 3.
   1208  * The idle thread must re-enable & re-program the LAPIC_TIMER in case 2.
   1209  */
   1210 static boolean_t
   1211 hpet_use_hpet_timer(hrtime_t *lapic_expire)
   1212 {
   1213 	extern hrtime_t	apic_timer_stop_count(void);
   1214 	extern void	apic_timer_restart(hrtime_t);
   1215 	hrtime_t	now, expire, dead;
   1216 	uint64_t	lapic_count, dead_count;
   1217 	cpupart_t	*cpu_part;
   1218 	processorid_t	cpu_sid;
   1219 	processorid_t	cpu_id = CPU->cpu_id;
   1220 	processorid_t	id;
   1221 	boolean_t	rslt;
   1222 	boolean_t	hset_update;
   1223 
   1224 	cpu_part = CPU->cpu_part;
   1225 	cpu_sid = CPU->cpu_seqid;
   1226 
   1227 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
   1228 
   1229 	/*
   1230 	 * A critical section exists between when the HPET is programmed
   1231 	 * to interrupt the CPU and when this CPU enters an idle state.
   1232 	 * Interrupts must be blocked during that time to prevent lost
   1233 	 * CBE wakeup interrupts from either LAPIC or HPET.
   1234 	 *
   1235 	 * Must block interrupts before acquiring hpet_proxy_lock to prevent
   1236 	 * a deadlock with the ISR if the ISR runs on this CPU after the
   1237 	 * idle thread acquires the mutex but before it clears interrupts.
   1238 	 */
   1239 	ASSERT(!interrupts_enabled());
   1240 	lapic_count = apic_timer_stop_count();
   1241 	now = gethrtime();
   1242 	dead = now + hpet_idle_spin_timeout;
   1243 	*lapic_expire = expire = now + lapic_count;
   1244 	if (lapic_count == (hrtime_t)-1) {
   1245 		/*
   1246 		 * LAPIC timer is currently disabled.
   1247 		 * Will not use the HPET as a LAPIC Timer proxy.
   1248 		 */
   1249 		*lapic_expire = (hrtime_t)HPET_INFINITY;
   1250 		return (B_TRUE);
   1251 	}
   1252 
   1253 	/*
   1254 	 * Serialize hpet_proxy data structure manipulation.
   1255 	 */
   1256 	dead_count = 0;
   1257 	while (!mutex_tryenter(&hpet_proxy_lock)) {
   1258 		/*
   1259 		 * spin
   1260 		 */
   1261 		apic_timer_restart(expire);
   1262 		sti();
   1263 		cli();
   1264 
   1265 		if (dead_count++ > hpet_spin_check) {
   1266 			dead_count = 0;
   1267 			hset_update = (((CPU->cpu_flags & CPU_OFFLINE) == 0) &&
   1268 			    (ncpus > 1));
   1269 			if (hset_update &&
   1270 			    !bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
   1271 				*lapic_expire = (hrtime_t)HPET_INFINITY;
   1272 				return (B_FALSE);
   1273 			}
   1274 		}
   1275 
   1276 		lapic_count = apic_timer_stop_count();
   1277 		now = gethrtime();
   1278 		*lapic_expire = expire = now + lapic_count;
   1279 		if (lapic_count == (hrtime_t)-1) {
   1280 			/*
   1281 			 * LAPIC timer is currently disabled.
   1282 			 * Will not use the HPET as a LAPIC Timer proxy.
   1283 			 */
   1284 			*lapic_expire = (hrtime_t)HPET_INFINITY;
   1285 			return (B_TRUE);
   1286 		}
   1287 		if (now > dead) {
   1288 			apic_timer_restart(expire);
   1289 			*lapic_expire = (hrtime_t)HPET_INFINITY;
   1290 			return (B_FALSE);
   1291 		}
   1292 	}
   1293 
   1294 	if ((hpet_state.cpr == B_TRUE) ||
   1295 	    (hpet_state.cpu_deep_idle == B_FALSE) ||
   1296 	    (hpet_state.proxy_installed == B_FALSE) ||
   1297 	    (hpet_state.uni_cstate == B_TRUE)) {
   1298 		mutex_exit(&hpet_proxy_lock);
   1299 		apic_timer_restart(expire);
   1300 		*lapic_expire = (hrtime_t)HPET_INFINITY;
   1301 		return (B_FALSE);
   1302 	}
   1303 
   1304 	hpet_proxy_users[cpu_id] = expire;
   1305 
   1306 	/*
   1307 	 * We are done if another cpu is scheduled on the HPET with an
   1308 	 * expire time before us.  The next HPET interrupt has been programmed
   1309 	 * to fire before our expire time.
   1310 	 */
   1311 	for (id = 0; id < max_ncpus; ++id) {
   1312 		if ((hpet_proxy_users[id] <= expire) && (id != cpu_id)) {
   1313 			mutex_exit(&hpet_proxy_lock);
   1314 			return (B_TRUE);
   1315 		}
   1316 	}
   1317 
   1318 	/*
   1319 	 * We are the next lAPIC to expire.
   1320 	 * Program the HPET with our expire time.
   1321 	 */
   1322 	rslt = hpet_guaranteed_schedule(expire);
   1323 	mutex_exit(&hpet_proxy_lock);
   1324 
   1325 	if (rslt == B_FALSE) {
   1326 		apic_timer_restart(expire);
   1327 		*lapic_expire = (hrtime_t)HPET_INFINITY;
   1328 	}
   1329 
   1330 	return (rslt);
   1331 }
   1332 
   1333 /*
   1334  * Called by the idle thread when waking up from Deep C-state before enabling
   1335  * interrupts.  With an array data structure it is faster to always remove
   1336  * ourself from the array without checking if the HPET ISR already removed.
   1337  *
   1338  * We use a lazy algorithm for removing CPUs from the HPET's schedule.
   1339  * We do not reprogram the HPET here because this CPU has real work to do.
   1340  * On a idle system the CPU was probably woken up by the HPET's ISR.
   1341  * On a heavily loaded system CPUs are not going into Deep C-state.
   1342  * On a moderately loaded system another CPU will usually enter Deep C-state
   1343  * and reprogram the HPET before the HPET fires with our wakeup.
   1344  */
   1345 static void
   1346 hpet_use_lapic_timer(hrtime_t expire)
   1347 {
   1348 	extern void	apic_timer_restart(hrtime_t);
   1349 	processorid_t	cpu_id = CPU->cpu_id;
   1350 
   1351 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
   1352 	ASSERT(!interrupts_enabled());
   1353 
   1354 	hpet_proxy_users[cpu_id] = HPET_INFINITY;
   1355 
   1356 	/*
   1357 	 * Do not enable a LAPIC Timer that was initially disabled.
   1358 	 */
   1359 	if (expire != HPET_INFINITY)
   1360 		apic_timer_restart(expire);
   1361 }
   1362 
   1363 /*
   1364  * Initialize data structure to keep track of CPUs using HPET as a proxy for
   1365  * their stalled local APIC timer.  For now this is just an array.
   1366  */
   1367 static void
   1368 hpet_init_proxy_data(void)
   1369 {
   1370 	processorid_t	id;
   1371 
   1372 	/*
   1373 	 * Use max_ncpus for hot plug compliance.
   1374 	 */
   1375 	hpet_proxy_users = kmem_zalloc(max_ncpus * sizeof (*hpet_proxy_users),
   1376 	    KM_SLEEP);
   1377 
   1378 	/*
   1379 	 * Unused entries always contain HPET_INFINITY.
   1380 	 */
   1381 	for (id = 0; id < max_ncpus; ++id)
   1382 		hpet_proxy_users[id] = HPET_INFINITY;
   1383 }
   1384