Home | History | Annotate | Download | only in cpu
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #if !defined(lint)
     27 #include "assym.h"
     28 #endif	/* !lint */
     29 
     30 /*
     31  * General assembly language routines.
     32  * It is the intent of this file to contain routines that are
     33  * specific to cpu architecture.
     34  */
     35 
     36 /*
     37  * WARNING: If you add a fast trap handler which can be invoked by a
     38  * non-privileged user, you may have to use the FAST_TRAP_DONE macro
     39  * instead of "done" instruction to return back to the user mode. See
     40  * comments for the "fast_trap_done" entry point for more information.
     41  */
     42 #define	FAST_TRAP_DONE	\
     43 	ba,a	fast_trap_done
     44 
     45 /*
     46  * Override GET_NATIVE_TIME for the cpu module code.  This is not
     47  * guaranteed to be exactly one instruction, be careful of using
     48  * the macro in delay slots.
     49  *
     50  * Do not use any instruction that modifies condition codes as the
     51  * caller may depend on these to remain unchanged across the macro.
     52  */
     53 #if defined(CHEETAH) || defined(OLYMPUS_C)
     54 
     55 #define	GET_NATIVE_TIME(out, scr1, scr2) \
     56 	rd	STICK, out
     57 #define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
     58 	rd	STICK, reg;		\
     59 	add	reg, delta, reg;	\
     60 	wr	reg, STICK
     61 #define	RD_TICKCMPR(out, scr)		\
     62 	rd	STICK_COMPARE, out
     63 #define	WR_TICKCMPR(in, scr1, scr2, label) \
     64 	wr	in, STICK_COMPARE
     65 
     66 #elif defined(HUMMINGBIRD)
     67 #include <sys/spitregs.h>
     68 
     69 /*
     70  * the current hummingbird version of %stick and %stick_cmp
     71  * were both implemented as (2) 32-bit locations in ASI_IO space;
     72  * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
     73  *
     74  * 64-bit opcodes are required, but move only 32-bits:
     75  *
     76  * ldxa [phys]ASI_IO, %dst 	reads  the low 32-bits from phys into %dst
     77  * stxa %src, [phys]ASI_IO 	writes the low 32-bits from %src into phys
     78  *
     79  * reg equivalent		[phys]ASI_IO
     80  * ------------------		---------------
     81  * %stick_cmp  low-32		0x1FE.0000.F060
     82  * %stick_cmp high-32		0x1FE.0000.F068
     83  * %stick      low-32		0x1FE.0000.F070
     84  * %stick     high-32		0x1FE.0000.F078
     85  */
     86 #define	HSTC_LOW	0x60			/* stick_cmp low  32-bits */
     87 #define	HSTC_HIGH	0x68			/* stick_cmp high 32-bits */
     88 #define	HST_LOW		0x70			/* stick low  32-bits */
     89 #define	HST_HIGH	0x78			/* stick high 32-bits */
     90 #define	HST_DIFF	0x08			/* low<-->high diff */
     91 
     92 /*
     93  * Any change in the number of instructions in SETL41()
     94  * will affect SETL41_OFF
     95  */
     96 #define	SETL41(reg, byte) \
     97 	sethi	%hi(0x1FE00000), reg;		/* 0000.0000.1FE0.0000 */ \
     98 	or	reg, 0xF, reg;			/* 0000.0000.1FE0.000F */ \
     99 	sllx	reg, 12, reg;			/* 0000.01FE.0000.F000 */ \
    100 	or	reg, byte, reg;			/* 0000.01FE.0000.F0xx */
    101 
    102 /*
    103  * SETL41_OFF is used to calulate the relative PC value when a
    104  * branch instruction needs to go over SETL41() macro
    105  */
    106 #define SETL41_OFF  16
    107 
    108 /*
    109  * reading stick requires 2 loads, and there could be an intervening
    110  * low-to-high 32-bit rollover resulting in a return value that is
    111  * off by about (2 ^ 32); this rare case is prevented by re-reading
    112  * the low-32 bits after the high-32 and verifying the "after" value
    113  * is >= the "before" value; if not, increment the high-32 value.
    114  *
    115  * this method is limited to 1 rollover, and based on the fixed
    116  * stick-frequency (5555555), requires the loads to complete within
    117  * 773 seconds; incrementing the high-32 value will not overflow for
    118  * about 52644 years.
    119  *
    120  * writing stick requires 2 stores; if the old/new low-32 value is
    121  * near 0xffffffff, there could be another rollover (also rare).
    122  * to prevent this, we first write a 0 to the low-32, then write
    123  * new values to the high-32 then the low-32.
    124  *
    125  * When we detect a carry in the lower %stick register, we need to
    126  * read HST_HIGH again. However at the point where we detect this,
    127  * we need to rebuild the register address HST_HIGH.This involves more
    128  * than one instructions and a branch is unavoidable. However, most of
    129  * the time, there is no carry. So we take the penalty of a branch
    130  * instruction only when there is carry (less frequent).
    131  *
    132  * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
    133  * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
    134  * addr already points to HST_LOW.
    135  *
    136  * NOTE: this method requires disabling interrupts before using
    137  * DELTA_NATIVE_TIME.
    138  */
    139 #define	GET_NATIVE_TIME(out, scr, tmp)	\
    140 	SETL41(scr, HST_LOW);		\
    141 	ldxa	[scr]ASI_IO, tmp;	\
    142 	inc	HST_DIFF, scr;		\
    143 	ldxa	[scr]ASI_IO, out;	\
    144 	dec	HST_DIFF, scr;		\
    145 	ldxa	[scr]ASI_IO, scr;	\
    146 	sub	scr, tmp, tmp;		\
    147 	brlz,pn tmp, .-(SETL41_OFF+24); \
    148 	sllx	out, 32, out;		\
    149 	or	out, scr, out
    150 #define	DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
    151 	SETL41(addr, HST_LOW);		\
    152 	ldxa	[addr]ASI_IO, tmp;	\
    153 	inc	HST_DIFF, addr;		\
    154 	ldxa	[addr]ASI_IO, high;	\
    155 	dec	HST_DIFF, addr;		\
    156 	ldxa	[addr]ASI_IO, low;	\
    157 	sub	low, tmp, tmp;		\
    158 	brlz,pn tmp, .-24;		\
    159 	sllx	high, 32, high;		\
    160 	or	high, low, high;	\
    161 	add	high, delta, high;	\
    162 	srl	high, 0, low;		\
    163 	srlx	high, 32, high;		\
    164 	stxa	%g0, [addr]ASI_IO;	\
    165 	inc	HST_DIFF, addr;		\
    166 	stxa	high, [addr]ASI_IO;	\
    167 	dec	HST_DIFF, addr;		\
    168 	stxa	low, [addr]ASI_IO
    169 #define RD_TICKCMPR(out, scr)		\
    170 	SETL41(scr, HSTC_LOW);		\
    171 	ldxa	[scr]ASI_IO, out;	\
    172 	inc	HST_DIFF, scr;		\
    173 	ldxa	[scr]ASI_IO, scr;	\
    174 	sllx	scr, 32, scr;		\
    175 	or	scr, out, out
    176 #define WR_TICKCMPR(in, scra, scrd, label) \
    177 	SETL41(scra, HSTC_HIGH);	\
    178 	srlx	in, 32, scrd;		\
    179 	stxa	scrd, [scra]ASI_IO;	\
    180 	dec	HST_DIFF, scra;		\
    181 	stxa	in, [scra]ASI_IO
    182 
    183 #else	/* !CHEETAH && !HUMMINGBIRD */
    184 
    185 #define	GET_NATIVE_TIME(out, scr1, scr2) \
    186 	rdpr	%tick, out
    187 #define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
    188 	rdpr	%tick, reg;		\
    189 	add	reg, delta, reg;	\
    190 	wrpr	reg, %tick
    191 #define	RD_TICKCMPR(out, scr)		\
    192 	rd	TICK_COMPARE, out
    193 #ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
    194 /*
    195  * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
    196  * The failure occurs only when the following instruction decodes to wr or
    197  * wrpr.  The workaround is to immediately follow writes to TICK_COMPARE
    198  * with a read, thus stalling the pipe and keeping following instructions
    199  * from causing data corruption.  Aligning to a quadword will ensure these
    200  * two instructions are not split due to i$ misses.
    201  */
    202 #define WR_TICKCMPR(cmpr,scr1,scr2,label)	\
    203 	ba,a	.bb_errata_1.label		;\
    204 	.align	64				;\
    205 .bb_errata_1.label:				;\
    206 	wr	cmpr, TICK_COMPARE		;\
    207 	rd	TICK_COMPARE, %g0
    208 #else	/* BB_ERRATA_1 */
    209 #define	WR_TICKCMPR(in,scr1,scr2,label)		\
    210 	wr	in, TICK_COMPARE
    211 #endif	/* BB_ERRATA_1 */
    212 
    213 #endif	/* !CHEETAH && !HUMMINGBIRD */
    214 
    215 #include <sys/clock.h>
    216 
    217 #if defined(lint)
    218 #include <sys/types.h>
    219 #include <sys/scb.h>
    220 #include <sys/systm.h>
    221 #include <sys/regset.h>
    222 #include <sys/sunddi.h>
    223 #include <sys/lockstat.h>
    224 #endif	/* lint */
    225 
    226 
    227 #include <sys/asm_linkage.h>
    228 #include <sys/privregs.h>
    229 #include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
    230 #include <sys/machthread.h>
    231 #include <sys/clock.h>
    232 #include <sys/intreg.h>
    233 #include <sys/psr_compat.h>
    234 #include <sys/isa_defs.h>
    235 #include <sys/dditypes.h>
    236 #include <sys/intr.h>
    237 
    238 #if !defined(lint)
    239 #include "assym.h"
    240 #endif	/* !lint */
    241 
    242 #if defined(lint)
    243 
    244 uint_t
    245 get_impl(void)
    246 { return (0); }
    247 
    248 #else	/* lint */
    249 
    250 	ENTRY(get_impl)
    251 	GET_CPU_IMPL(%o0)
    252 	retl
    253 	nop
    254 	SET_SIZE(get_impl)
    255 
    256 #endif	/* lint */
    257 
    258 #if defined(lint)
    259 /*
    260  * Softint generated when counter field of tick reg matches value field
    261  * of tick_cmpr reg
    262  */
    263 /*ARGSUSED*/
    264 void
    265 tickcmpr_set(uint64_t clock_cycles)
    266 {}
    267 
    268 #else	/* lint */
    269 
    270 	ENTRY_NP(tickcmpr_set)
    271 	! get 64-bit clock_cycles interval
    272 	mov	%o0, %o2
    273 	mov	8, %o3			! A reasonable initial step size
    274 1:
    275 	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
    276 
    277 	GET_NATIVE_TIME(%o0, %o4, %o5)	! Read %tick to confirm the
    278 	sllx	%o0, 1, %o0		!   value we wrote was in the future.
    279 	srlx	%o0, 1, %o0
    280 
    281 	cmp	%o2, %o0		! If the value we wrote was in the
    282 	bg,pt	%xcc, 2f		!   future, then blow out of here.
    283 	sllx	%o3, 1, %o3		! If not, then double our step size,
    284 	ba,pt	%xcc, 1b		!   and take another lap.
    285 	add	%o0, %o3, %o2		!
    286 2:
    287 	retl
    288 	nop
    289 	SET_SIZE(tickcmpr_set)
    290 
    291 #endif	/* lint */
    292 
    293 #if defined(lint)
    294 
    295 void
    296 tickcmpr_disable(void)
    297 {}
    298 
    299 #else	/* lint */
    300 
    301 	ENTRY_NP(tickcmpr_disable)
    302 	mov	1, %g1
    303 	sllx	%g1, TICKINT_DIS_SHFT, %o0
    304 	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
    305 	retl
    306 	nop
    307 	SET_SIZE(tickcmpr_disable)
    308 
    309 #endif	/* lint */
    310 
    311 #if defined(lint)
    312 
    313 /*
    314  * tick_write_delta() increments %tick by the specified delta.  This should
    315  * only be called after a CPR event to assure that gethrtime() continues to
    316  * increase monotonically.  Obviously, writing %tick needs to de done very
    317  * carefully to avoid introducing unnecessary %tick skew across CPUs.  For
    318  * this reason, we make sure we're i-cache hot before actually writing to
    319  * %tick.
    320  */
    321 /*ARGSUSED*/
    322 void
    323 tick_write_delta(uint64_t delta)
    324 {}
    325 
    326 #else	/* lint */
    327 
    328 #ifdef DEBUG
    329 	.seg	".text"
    330 tick_write_panic:
    331 	.asciz	"tick_write_delta: interrupts already disabled on entry"
    332 #endif	/* DEBUG */
    333 
    334 	ENTRY_NP(tick_write_delta)
    335 	rdpr	%pstate, %g1
    336 #ifdef DEBUG
    337 	andcc	%g1, PSTATE_IE, %g0	! If DEBUG, check that interrupts
    338 	bnz	0f			! aren't already disabled.
    339 	sethi	%hi(tick_write_panic), %o1
    340         save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
    341 	call	panic
    342 	or	%i1, %lo(tick_write_panic), %o0
    343 #endif	/* DEBUG */
    344 0:	wrpr	%g1, PSTATE_IE, %pstate	! Disable interrupts
    345 	mov	%o0, %o2
    346 	ba	0f			! Branch to cache line-aligned instr.
    347 	nop
    348 	.align	16
    349 0:	nop				! The next 3 instructions are now hot.
    350 	DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2)	! read/inc/write %tick
    351 
    352 	retl				! Return
    353 	wrpr	%g0, %g1, %pstate	!     delay: Re-enable interrupts
    354 #endif	/* lint */
    355 
    356 #if defined(lint)
    357 /*
    358  *  return 1 if disabled
    359  */
    360 
    361 int
    362 tickcmpr_disabled(void)
    363 { return (0); }
    364 
    365 #else	/* lint */
    366 
    367 	ENTRY_NP(tickcmpr_disabled)
    368 	RD_TICKCMPR(%g1, %o0)
    369 	retl
    370 	srlx	%g1, TICKINT_DIS_SHFT, %o0
    371 	SET_SIZE(tickcmpr_disabled)
    372 
    373 #endif	/* lint */
    374 
    375 /*
    376  * Get current tick
    377  */
    378 #if defined(lint)
    379 
    380 u_longlong_t
    381 gettick(void)
    382 { return (0); }
    383 
    384 #else	/* lint */
    385 
    386 	ENTRY(gettick)
    387 	GET_NATIVE_TIME(%o0, %o2, %o3)
    388 	retl
    389 	nop
    390 	SET_SIZE(gettick)
    391 
    392 #endif	/* lint */
    393 
    394 
    395 /*
    396  * Return the counter portion of the tick register.
    397  */
    398 
    399 #if defined(lint)
    400 
    401 uint64_t
    402 gettick_counter(void)
    403 { return(0); }
    404 
    405 #else	/* lint */
    406 
    407 	ENTRY_NP(gettick_counter)
    408 	rdpr	%tick, %o0
    409 	sllx	%o0, 1, %o0
    410 	retl
    411 	srlx	%o0, 1, %o0		! shake off npt bit
    412 	SET_SIZE(gettick_counter)
    413 #endif	/* lint */
    414 
    415 /*
    416  * Provide a C callable interface to the trap that reads the hi-res timer.
    417  * Returns 64-bit nanosecond timestamp in %o0 and %o1.
    418  */
    419 
    420 #if defined(lint)
    421 
    422 hrtime_t
    423 gethrtime(void)
    424 {
    425 	return ((hrtime_t)0);
    426 }
    427 
    428 hrtime_t
    429 gethrtime_unscaled(void)
    430 {
    431 	return ((hrtime_t)0);
    432 }
    433 
    434 hrtime_t
    435 gethrtime_max(void)
    436 {
    437 	return ((hrtime_t)0);
    438 }
    439 
    440 void
    441 scalehrtime(hrtime_t *hrt)
    442 {
    443 	*hrt = 0;
    444 }
    445 
    446 void
    447 gethrestime(timespec_t *tp)
    448 {
    449 	tp->tv_sec = 0;
    450 	tp->tv_nsec = 0;
    451 }
    452 
    453 time_t
    454 gethrestime_sec(void)
    455 {
    456 	return (0);
    457 }
    458 
    459 void
    460 gethrestime_lasttick(timespec_t *tp)
    461 {
    462 	tp->tv_sec = 0;
    463 	tp->tv_nsec = 0;
    464 }
    465 
    466 /*ARGSUSED*/
    467 void
    468 hres_tick(void)
    469 {
    470 }
    471 
    472 void
    473 panic_hres_tick(void)
    474 {
    475 }
    476 
    477 #else	/* lint */
    478 
    479 	ENTRY_NP(gethrtime)
    480 	GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
    481 							! %g1 = hrtime
    482 	retl
    483 	mov	%g1, %o0
    484 	SET_SIZE(gethrtime)
    485 
    486 	ENTRY_NP(gethrtime_unscaled)
    487 	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
    488 	retl
    489 	mov	%g1, %o0
    490 	SET_SIZE(gethrtime_unscaled)
    491 
    492 	ENTRY_NP(gethrtime_waitfree)
    493 	ALTENTRY(dtrace_gethrtime)
    494 	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
    495 	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
    496 	retl
    497 	mov	%g1, %o0
    498 	SET_SIZE(dtrace_gethrtime)
    499 	SET_SIZE(gethrtime_waitfree)
    500 
    501 	ENTRY(gethrtime_max)
    502 	NATIVE_TIME_MAX(%g1)
    503 	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
    504 
    505 	! hrtime_t's are signed, max hrtime_t must be positive
    506 	mov	-1, %o2
    507 	brlz,a	%g1, 1f
    508 	srlx	%o2, 1, %g1
    509 1:
    510 	retl
    511 	mov	%g1, %o0
    512 	SET_SIZE(gethrtime_max)
    513 
    514 	ENTRY(scalehrtime)
    515 	ldx	[%o0], %o1
    516 	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
    517 	retl
    518 	stx	%o1, [%o0]
    519 	SET_SIZE(scalehrtime)
    520 
    521 /*
    522  * Fast trap to return a timestamp, uses trap window, leaves traps
    523  * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
    524  *
    525  * This is the handler for the ST_GETHRTIME trap.
    526  */
    527 
    528 	ENTRY_NP(get_timestamp)
    529 	GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2)	! %g1 = hrtime
    530 	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
    531 	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
    532 	FAST_TRAP_DONE
    533 	SET_SIZE(get_timestamp)
    534 
    535 /*
    536  * Macro to convert GET_HRESTIME() bits into a timestamp.
    537  *
    538  * We use two separate macros so that the platform-dependent GET_HRESTIME()
    539  * can be as small as possible; CONV_HRESTIME() implements the generic part.
    540  */
    541 #define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
    542 	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
    543 	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
    544 	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
    545 	srlx	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
    546 	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
    547 	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
    548 	ba	3f;			/* go convert to sec/nsec */	\
    549 	add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
    550 2:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
    551 	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
    552 	add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
    553 	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
    554 3:	cmp	hrestnsec, nano;	/* more than a billion? */	\
    555 	bl,pt	%xcc, 4f;		/* if not, we're done */	\
    556 	nop;				/* delay: do nothing :( */	\
    557 	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
    558 	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \
    559 	ba,a	3b;			/* check >= billion again */	\
    560 4:
    561 
    562 	ENTRY_NP(gethrestime)
    563 	GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
    564 	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
    565 	stn	%o1, [%o0]
    566 	retl
    567 	stn	%o2, [%o0 + CLONGSIZE]
    568 	SET_SIZE(gethrestime)
    569 
    570 /*
    571  * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
    572  * seconds.
    573  */
    574 	ENTRY_NP(gethrestime_sec)
    575 	GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
    576 	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
    577 	retl					! %o0 current hrestime seconds
    578 	nop
    579 	SET_SIZE(gethrestime_sec)
    580 
    581 /*
    582  * Returns the hrestime on the last tick.  This is simpler than gethrestime()
    583  * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
    584  * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
    585  * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
    586  * rely on load dependencies to effect the membar #LoadLoad, instead declaring
    587  * it explicitly.)
    588  */
    589 	ENTRY_NP(gethrestime_lasttick)
    590 	sethi	%hi(hres_lock), %o1
    591 0:
    592 	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
    593 	membar	#LoadLoad			! Load of lock must complete
    594 	andn	%o2, 1, %o2			! Mask off lowest bit
    595 	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
    596 	add	%o1, %lo(hrestime), %o4
    597 	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
    598 	membar	#LoadLoad			! All loads must complete
    599 	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
    600 	cmp	%o3, %o2			! If lock is locked or has
    601 	bne	0b				!   changed, retry.
    602 	stn	%g1, [%o0]			! Delay: store seconds
    603 	retl
    604 	stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
    605 	SET_SIZE(gethrestime_lasttick)
    606 
    607 /*
    608  * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
    609  *
    610  * This is the handler for the ST_GETHRESTIME trap.
    611  */
    612 
    613 	ENTRY_NP(get_hrestime)
    614 	GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
    615 	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
    616 	FAST_TRAP_DONE
    617 	SET_SIZE(get_hrestime)
    618 
    619 /*
    620  * Fast trap to return lwp virtual time, uses trap window, leaves traps
    621  * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
    622  * of nanoseconds consumed.
    623  *
    624  * This is the handler for the ST_GETHRVTIME trap.
    625  *
    626  * Register usage:
    627  *	%o0, %o1 = return lwp virtual time
    628  * 	%o2 = CPU/thread
    629  * 	%o3 = lwp
    630  * 	%g1 = scratch
    631  * 	%g5 = scratch
    632  */
    633 	ENTRY_NP(get_virtime)
    634 	GET_NATIVE_TIME(%g5, %g1, %g2)	! %g5 = native time in ticks
    635 	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
    636 	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
    637 	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
    638 
    639 	/*
    640 	 * Subtract start time of current microstate from time
    641 	 * of day to get increment for lwp virtual time.
    642 	 */
    643 	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
    644 	sub	%g5, %g1, %g5
    645 
    646 	/*
    647 	 * Add current value of ms_acct[LMS_USER]
    648 	 */
    649 	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
    650 	add	%g5, %g1, %g5
    651 	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
    652 
    653 	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
    654 	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
    655 
    656 	FAST_TRAP_DONE
    657 	SET_SIZE(get_virtime)
    658 
    659 
    660 
    661 	.seg	".text"
    662 hrtime_base_panic:
    663 	.asciz	"hrtime_base stepping back"
    664 
    665 
    666 	ENTRY_NP(hres_tick)
    667 	save	%sp, -SA(MINFRAME), %sp	! get a new window
    668 
    669 	sethi	%hi(hrestime), %l4
    670 	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
    671 7:	tst	%l5
    672 	bz,pt	%xcc, 8f			! if we got it, drive on
    673 	ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
    674 	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
    675 9:	tst	%l5
    676 	bz,a,pn	%xcc, 7b
    677 	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
    678 	ba,pt	%xcc, 9b
    679 	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
    680 8:
    681 	membar	#StoreLoad|#StoreStore
    682 
    683 	!
    684 	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
    685 	!
    686 	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
    687 	GET_NATIVE_TIME(%l0, %l3, %l6)		! current native time
    688 	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
    689 	! convert native time to nsecs
    690 	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
    691 
    692 	sub	%l0, %g1, %i1			! get accurate nsec delta
    693 
    694 	ldx	[%l4 + %lo(hrtime_base)], %l1
    695 	cmp	%l1, %l0
    696 	bg,pn	%xcc, 9f
    697 	nop
    698 
    699 	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
    700 
    701 	!
    702 	! apply adjustment, if any
    703 	!
    704 	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
    705 	brz	%l0, 2f
    706 						! hrestime_adj == 0 ?
    707 						! yes, skip adjustments
    708 	clr	%l5				! delay: set adj to zero
    709 	tst	%l0				! is hrestime_adj >= 0 ?
    710 	bge,pt	%xcc, 1f			! yes, go handle positive case
    711 	srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
    712 
    713 	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
    714 	bl,pt	%xcc, 2f			! yes, use current adj
    715 	neg	%l5				! delay: %l5 = -adj
    716 	ba,pt	%xcc, 2f
    717 	mov	%l0, %l5			! no, so set adj = hrestime_adj
    718 1:
    719 	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
    720 	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
    721 	mov	%l0, %l5			! delay: adj = hrestime_adj
    722 2:
    723 	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
    724 	sub	%l0, %l5, %l0			! timedelta -= adj
    725 
    726 	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
    727 	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
    728 
    729 	or	%l4, %lo(hrestime), %l2
    730 	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
    731 	ldn	[%l2 + CLONGSIZE], %i3
    732 	add	%i3, %l5, %i3			! hrestime.nsec += adj
    733 	add	%i3, %i1, %i3			! hrestime.nsec += nslt
    734 
    735 	set	NANOSEC, %l5			! %l5 = NANOSEC
    736 	cmp	%i3, %l5
    737 	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
    738 	sethi	%hi(one_sec), %i1		! delay
    739 	add	%i2, 0x1, %i2			! hrestime.tv_sec++
    740 	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
    741 	mov	0x1, %l5
    742 	st	%l5, [%i1 + %lo(one_sec)]
    743 5:
    744 	stn	%i2, [%l2]
    745 	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
    746 
    747 	membar	#StoreStore
    748 
    749 	ld	[%l4 + %lo(hres_lock)], %i1
    750 	inc	%i1				! release lock
    751 	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
    752 
    753 	ret
    754 	restore
    755 
    756 9:
    757 	!
    758 	! release hres_lock
    759 	!
    760 	ld	[%l4 + %lo(hres_lock)], %i1
    761 	inc	%i1
    762 	st	%i1, [%l4 + %lo(hres_lock)]
    763 
    764 	sethi	%hi(hrtime_base_panic), %o0
    765 	call	panic
    766 	or	%o0, %lo(hrtime_base_panic), %o0
    767 
    768 	SET_SIZE(hres_tick)
    769 
    770 #endif	/* lint */
    771 
    772 #if !defined(lint) && !defined(__lint)
    773 
    774 	.seg	".text"
    775 kstat_q_panic_msg:
    776 	.asciz	"kstat_q_exit: qlen == 0"
    777 
    778 	ENTRY(kstat_q_panic)
    779 	save	%sp, -SA(MINFRAME), %sp
    780 	sethi	%hi(kstat_q_panic_msg), %o0
    781 	call	panic
    782 	or	%o0, %lo(kstat_q_panic_msg), %o0
    783 	/*NOTREACHED*/
    784 	SET_SIZE(kstat_q_panic)
    785 
    786 #define	BRZPN	brz,pn
    787 #define	BRZPT	brz,pt
    788 
    789 #define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
    790 	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
    791 	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
    792 	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
    793 	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
    794 	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
    795 	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
    796 	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
    797 	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
    798 	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
    799 	add	%o4, %o2, %o4;			/* %o4 = new time */	\
    800 	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
    801 	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
    802 	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
    803 QRETURN;								\
    804 	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
    805 
    806 	.align 16
    807 	ENTRY(kstat_waitq_enter)
    808 	GET_NATIVE_TIME(%g1, %g2, %g3)
    809 	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
    810 	SET_SIZE(kstat_waitq_enter)
    811 
    812 	.align 16
    813 	ENTRY(kstat_waitq_exit)
    814 	GET_NATIVE_TIME(%g1, %g2, %g3)
    815 	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
    816 	SET_SIZE(kstat_waitq_exit)
    817 
    818 	.align 16
    819 	ENTRY(kstat_runq_enter)
    820 	GET_NATIVE_TIME(%g1, %g2, %g3)
    821 	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
    822 	SET_SIZE(kstat_runq_enter)
    823 
    824 	.align 16
    825 	ENTRY(kstat_runq_exit)
    826 	GET_NATIVE_TIME(%g1, %g2, %g3)
    827 	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
    828 	SET_SIZE(kstat_runq_exit)
    829 
    830 	.align 16
    831 	ENTRY(kstat_waitq_to_runq)
    832 	GET_NATIVE_TIME(%g1, %g2, %g3)
    833 	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
    834 	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
    835 	SET_SIZE(kstat_waitq_to_runq)
    836 
    837 	.align 16
    838 	ENTRY(kstat_runq_back_to_waitq)
    839 	GET_NATIVE_TIME(%g1, %g2, %g3)
    840 	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
    841 	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
    842 	SET_SIZE(kstat_runq_back_to_waitq)
    843 
    844 #endif	/* !(lint || __lint) */
    845 
    846 #ifdef lint
    847 
    848 int64_t timedelta;
    849 hrtime_t hres_last_tick;
    850 volatile timestruc_t hrestime;
    851 int64_t hrestime_adj;
    852 volatile int hres_lock;
    853 uint_t nsec_scale;
    854 hrtime_t hrtime_base;
    855 int traptrace_use_stick;
    856 
    857 #else	/* lint */
    858 	/*
    859 	 *  -- WARNING --
    860 	 *
    861 	 * The following variables MUST be together on a 128-byte boundary.
    862 	 * In addition to the primary performance motivation (having them all
    863 	 * on the same cache line(s)), code here and in the GET*TIME() macros
    864 	 * assumes that they all have the same high 22 address bits (so
    865 	 * there's only one sethi).
    866 	 */
    867 	.seg	".data"
    868 	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
    869 	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
    870 	.global	nsec_shift, adj_shift
    871 
    872 	/* XXX - above comment claims 128-bytes is necessary */
    873 	.align	64
    874 timedelta:
    875 	.word	0, 0		/* int64_t */
    876 hres_last_tick:
    877 	.word	0, 0		/* hrtime_t */
    878 hrestime:
    879 	.nword	0, 0		/* 2 longs */
    880 hrestime_adj:
    881 	.word	0, 0		/* int64_t */
    882 hres_lock:
    883 	.word	0
    884 nsec_scale:
    885 	.word	0
    886 hrtime_base:
    887 	.word	0, 0
    888 traptrace_use_stick:
    889 	.word	0
    890 nsec_shift:
    891 	.word	NSEC_SHIFT
    892 adj_shift:
    893 	.word	ADJ_SHIFT
    894 
    895 #endif	/* lint */
    896 
    897 
    898 /*
    899  * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
    900  * usec_delay(int n)		[compatibility - should go one day]
    901  * Delay by spinning.
    902  *
    903  * delay for n microseconds.  numbers <= 0 delay 1 usec
    904  *
    905  * With UltraSPARC-III the combination of supporting mixed-speed CPUs
    906  * and variable clock rate for power management requires that we
    907  * use %stick to implement this routine.
    908  *
    909  * For OPL platforms that support the "sleep" instruction, we
    910  * conditionally (ifdef'ed) insert a "sleep" instruction in
    911  * the loop. Note that theoritically we should have move (duplicated)
    912  * the code down to spitfire/us3/opl specific asm files - but this
    913  * is alot of code duplication just to add one "sleep" instruction.
    914  * We chose less code duplication for this.
    915  */
    916 
    917 #if defined(lint)
    918 
    919 /*ARGSUSED*/
    920 void
    921 drv_usecwait(clock_t n)
    922 {}
    923 
    924 /*ARGSUSED*/
    925 void
    926 usec_delay(int n)
    927 {}
    928 
    929 #else	/* lint */
    930 
    931 	ENTRY(drv_usecwait)
    932 	ALTENTRY(usec_delay)
    933 	brlez,a,pn %o0, 0f
    934 	mov	1, %o0
    935 0:
    936 	sethi	%hi(sticks_per_usec), %o1
    937 	lduw	[%o1 + %lo(sticks_per_usec)], %o1
    938 	mulx	%o1, %o0, %o1		! Scale usec to ticks
    939 	inc	%o1			! We don't start on a tick edge
    940 	GET_NATIVE_TIME(%o2, %o3, %o4)
    941 	add	%o1, %o2, %o1
    942 
    943 1:
    944 #ifdef	_OPL
    945 	.word 0x81b01060		! insert "sleep" instruction
    946 #endif /* _OPL */			! use byte code for now
    947 	cmp	%o1, %o2
    948 	GET_NATIVE_TIME(%o2, %o3, %o4)
    949 	bgeu,pt	%xcc, 1b
    950 	nop
    951 	retl
    952 	nop
    953 	SET_SIZE(usec_delay)
    954 	SET_SIZE(drv_usecwait)
    955 #endif	/* lint */
    956 
    957 #if defined(lint)
    958 
    959 /* ARGSUSED */
    960 void
    961 pil14_interrupt(int level)
    962 {}
    963 
    964 #else	/* lint */
    965 
    966 /*
    967  * Level-14 interrupt prologue.
    968  */
    969 	ENTRY_NP(pil14_interrupt)
    970 	CPU_ADDR(%g1, %g2)
    971 	rdpr	%pil, %g6			! %g6 = interrupted PIL
    972 	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
    973 	rdpr	%tstate, %g6
    974 	rdpr	%tpc, %g5
    975 	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
    976 	bnz,a,pt %xcc, 1f
    977 	stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
    978 	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
    979 	ba	pil_interrupt_common		! must be large-disp branch
    980 	stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
    981 1:	ba	pil_interrupt_common		! must be large-disp branch
    982 	stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
    983 	SET_SIZE(pil14_interrupt)
    984 
    985 	ENTRY_NP(tick_rtt)
    986 	!
    987 	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
    988 	! disabled.  If TICK_COMPARE is enabled, we know that we need to
    989 	! reenqueue the interrupt request structure.  We'll then check TICKINT
    990 	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
    991 	! interrupt.  In this case, TICK_COMPARE may have been rewritten
    992 	! recently; we'll compare %o5 to the current time to verify that it's
    993 	! in the future.
    994 	!
    995 	! Note that %o5 is live until after 1f.
    996 	! XXX - there is a subroutine call while %o5 is live!
    997 	!
    998 	RD_TICKCMPR(%o5, %g1)
    999 	srlx	%o5, TICKINT_DIS_SHFT, %g1
   1000 	brnz,pt	%g1, 2f
   1001 	nop
   1002 
   1003 	rdpr 	%pstate, %g5
   1004 	andn	%g5, PSTATE_IE, %g1
   1005 	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
   1006 
   1007 	sethi	%hi(cbe_level14_inum), %o1
   1008 	ldx	[%o1 + %lo(cbe_level14_inum)], %o1
   1009 	call	intr_enqueue_req ! preserves %o5 and %g5
   1010 	mov	PIL_14, %o0
   1011 
   1012 	! Check SOFTINT for TICKINT/STICKINT
   1013 	rd	SOFTINT, %o4
   1014 	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
   1015 	andcc	%o4, %o0, %g0
   1016 	bz,a,pn	%icc, 2f
   1017 	wrpr	%g0, %g5, %pstate		! Enable vec interrupts
   1018 
   1019 	! clear TICKINT/STICKINT
   1020 	wr	%o0, CLEAR_SOFTINT
   1021 
   1022 	!
   1023 	! Now that we've cleared TICKINT, we can reread %tick and confirm
   1024 	! that the value we programmed is still in the future.  If it isn't,
   1025 	! we need to reprogram TICK_COMPARE to fire as soon as possible.
   1026 	!
   1027 	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
   1028 	sllx	%o0, 1, %o0			! Clear the DIS bit
   1029 	srlx	%o0, 1, %o0
   1030 	cmp	%o5, %o0			! In the future?
   1031 	bg,a,pt	%xcc, 2f			! Yes, drive on.
   1032 	wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
   1033 
   1034 	!
   1035 	! If we're here, then we have programmed TICK_COMPARE with a %tick
   1036 	! which is in the past; we'll now load an initial step size, and loop
   1037 	! until we've managed to program TICK_COMPARE to fire in the future.
   1038 	!
   1039 	mov	8, %o4				! 8 = arbitrary inital step
   1040 1:	add	%o0, %o4, %o5			! Add the step
   1041 	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
   1042 	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
   1043 	sllx	%o0, 1, %o0			! Clear the DIS bit
   1044 	srlx	%o0, 1, %o0
   1045 	cmp	%o5, %o0			! In the future?
   1046 	bg,a,pt	%xcc, 2f			! Yes, drive on.
   1047 	wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
   1048 	ba	1b				! No, try again.
   1049 	sllx	%o4, 1, %o4			!    delay: double step size
   1050 
   1051 2:	ba	current_thread_complete
   1052 	nop
   1053 	SET_SIZE(tick_rtt)
   1054 
   1055 #endif	/* lint */
   1056 
   1057 #if defined(lint)
   1058 
   1059 /* ARGSUSED */
   1060 void
   1061 pil15_interrupt(int level)
   1062 {}
   1063 
   1064 #else  /* lint */
   1065 
   1066 /*
   1067  * Level-15 interrupt prologue.
   1068  */
   1069        ENTRY_NP(pil15_interrupt)
   1070        CPU_ADDR(%g1, %g2)
   1071        rdpr    %tstate, %g6
   1072        rdpr    %tpc, %g5
   1073        btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
   1074        bnz,a,pt %xcc, 1f
   1075        stn     %g5, [%g1 + CPU_CPCPROFILE_PC]  ! if so, record kernel PC
   1076        stn     %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
   1077        ba      pil15_epilogue                  ! must be large-disp branch
   1078        stn     %g0, [%g1 + CPU_CPCPROFILE_PC]  ! zero kernel PC
   1079 1:     ba      pil15_epilogue                  ! must be large-disp branch
   1080        stn     %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
   1081        SET_SIZE(pil15_interrupt)
   1082 
   1083 #endif /* lint */
   1084 
   1085 #if defined(lint) || defined(__lint)
   1086 
   1087 /* ARGSUSED */
   1088 uint64_t
   1089 find_cpufrequency(volatile uchar_t *clock_ptr)
   1090 {
   1091 	return (0);
   1092 }
   1093 
   1094 #else	/* lint */
   1095 
   1096 #ifdef DEBUG
   1097 	.seg	".text"
   1098 find_cpufreq_panic:
   1099 	.asciz	"find_cpufrequency: interrupts already disabled on entry"
   1100 #endif	/* DEBUG */
   1101 
   1102 	ENTRY_NP(find_cpufrequency)
   1103 	rdpr	%pstate, %g1
   1104 
   1105 #ifdef DEBUG
   1106 	andcc	%g1, PSTATE_IE, %g0	! If DEBUG, check that interrupts
   1107 	bnz	0f			! are currently enabled
   1108 	sethi	%hi(find_cpufreq_panic), %o1
   1109 	call	panic
   1110 	or	%o1, %lo(find_cpufreq_panic), %o0
   1111 #endif	/* DEBUG */
   1112 
   1113 0:
   1114 	wrpr	%g1, PSTATE_IE, %pstate	! Disable interrupts
   1115 3:
   1116 	ldub	[%o0], %o1		! Read the number of seconds
   1117 	mov	%o1, %o2		! remember initial value in %o2
   1118 1:
   1119 	GET_NATIVE_TIME(%o3, %g4, %g5)
   1120 	cmp	%o1, %o2		! did the seconds register roll over?
   1121 	be,pt	%icc, 1b		! branch back if unchanged
   1122 	ldub	[%o0], %o2		!   delay: load the new seconds val
   1123 
   1124 	brz,pn	%o2, 3b			! if the minutes just rolled over,
   1125 					! the last second could have been
   1126 					! inaccurate; try again.
   1127 	mov	%o2, %o4		!   delay: store init. val. in %o2
   1128 2:
   1129 	GET_NATIVE_TIME(%o5, %g4, %g5)
   1130 	cmp	%o2, %o4		! did the seconds register roll over?
   1131 	be,pt	%icc, 2b		! branch back if unchanged
   1132 	ldub	[%o0], %o4		!   delay: load the new seconds val
   1133 
   1134 	brz,pn	%o4, 0b			! if the minutes just rolled over,
   1135 					! the last second could have been
   1136 					! inaccurate; try again.
   1137 	wrpr	%g0, %g1, %pstate	!   delay: re-enable interrupts
   1138 
   1139 	retl
   1140 	sub	%o5, %o3, %o0		! return the difference in ticks
   1141 	SET_SIZE(find_cpufrequency)
   1142 
   1143 #endif	/* lint */
   1144 
   1145 #if defined(lint)
   1146 /*
   1147  * Prefetch a page_t for write or read, this assumes a linear
   1148  * scan of sequential page_t's.
   1149  */
   1150 /*ARGSUSED*/
   1151 void
   1152 prefetch_page_w(void *pp)
   1153 {}
   1154 
   1155 /*ARGSUSED*/
   1156 void
   1157 prefetch_page_r(void *pp)
   1158 {}
   1159 #else	/* lint */
   1160 
   1161 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
   1162 	defined(SERRANO)
   1163 	!
   1164 	! On US-III, the prefetch instruction queue is 8 entries deep.
   1165 	! Also, prefetches for write put data in the E$, which has
   1166 	! lines of 512 bytes for an 8MB cache. Each E$ line is further
   1167 	! subblocked into 64 byte chunks.
   1168 	!
   1169 	! Since prefetch can only bring in 64 bytes at a time (See Sparc
   1170 	! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
   1171 	! then 2 prefetches are required in order to bring an entire
   1172 	! page into the E$.
   1173 	!
   1174 	! Since the prefetch queue is 8 entries deep, we currently can
   1175 	! only have 4 prefetches for page_t's outstanding. Thus, we
   1176 	! prefetch n+4 ahead of where we are now:
   1177 	!
   1178 	!      4 * sizeof(page_t)     -> 512
   1179 	!      4 * sizeof(page_t) +64 -> 576
   1180 	!
   1181 	! Example
   1182 	! =======
   1183 	! contiguous page array in memory...
   1184 	!
   1185 	! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
   1186 	! ^         ^         ^         ^         ^    ^
   1187 	! pp                                      |    pp+4*sizeof(page)+64
   1188 	!                                         |
   1189 	!                                         pp+4*sizeof(page)
   1190 	!
   1191 	!  Prefetch
   1192 	!   Queue
   1193 	! +-------+<--- In this iteration, we're working with pp (AAA1),
   1194 	! |Preftch|     but we enqueue prefetch for addr = XXX1
   1195 	! | XXX1  |
   1196 	! +-------+<--- this queue slot will be a prefetch instruction for
   1197 	! |Preftch|     for addr = pp + 4*sizeof(page_t) + 64 (or second
   1198 	! | XXX2  |     half of page XXX)
   1199 	! +-------+
   1200 	! |Preftch|<-+- The next time around this function, we'll be
   1201 	! | YYY1  |  |  working with pp = BBB1, but will be enqueueing
   1202 	! +-------+  |  prefetches to for both halves of page YYY,
   1203 	! |Preftch|  |  while both halves of page XXX are in transit
   1204 	! | YYY2  |<-+  make their way into the E$.
   1205 	! +-------+
   1206 	! |Preftch|
   1207 	! | ZZZ1  |
   1208 	! +-------+
   1209 	! .       .
   1210 	! :       :
   1211 	!
   1212 	!  E$
   1213 	! +============================================...
   1214 	! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
   1215 	! +============================================...
   1216 	! |      |      |      |      |      |      |
   1217 	! +============================================...
   1218 	! .
   1219 	! :
   1220 	!
   1221 	! So we should expect the first four page accesses to stall
   1222 	! while we warm up the cache, afterwhich, most of the pages
   1223 	! will have their pp ready in the E$.
   1224 	!
   1225 	! Also note that if sizeof(page_t) grows beyond 128, then
   1226 	! we'll need an additional prefetch to get an entire page
   1227 	! into the E$, thus reducing the number of outstanding page
   1228 	! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots)
   1229 	! etc.
   1230 	!
   1231 	! Cheetah+
   1232 	! ========
   1233 	! On Cheetah+ we use "#n_write" prefetches as these avoid
   1234 	! unnecessary RTS->RTO bus transaction state change, and
   1235 	! just issues RTO transaction. (See pp.77 of Cheetah+ Delta
   1236 	! PRM). On Cheetah, #n_write prefetches are reflected with
   1237 	! RTS->RTO state transition regardless.
   1238 	!
   1239 #define STRIDE1 512
   1240 #define STRIDE2 576
   1241 
   1242 #if	STRIDE1 != (PAGE_SIZE * 4)
   1243 #error	"STRIDE1 != (PAGE_SIZE * 4)"
   1244 #endif	/* STRIDE1 != (PAGE_SIZE * 4) */
   1245 
   1246         ENTRY(prefetch_page_w)
   1247         prefetch        [%o0+STRIDE1], #n_writes
   1248         retl
   1249         prefetch        [%o0+STRIDE2], #n_writes
   1250         SET_SIZE(prefetch_page_w)
   1251 
   1252 	!
   1253 	! Note on CHEETAH to prefetch for read, we really use #one_write.
   1254 	! This fetches to E$ (general use) rather than P$ (floating point use).
   1255 	!
   1256         ENTRY(prefetch_page_r)
   1257         prefetch        [%o0+STRIDE1], #one_write
   1258         retl
   1259         prefetch        [%o0+STRIDE2], #one_write
   1260         SET_SIZE(prefetch_page_r)
   1261 
   1262 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
   1263 
   1264 	!
   1265 	! UltraSparcII can have up to 3 prefetches outstanding.
   1266 	! A page_t is 128 bytes (2 prefetches of 64 bytes each)
   1267 	! So prefetch for pp + 1, which is
   1268 	!
   1269 	!       pp + sizeof(page_t)
   1270 	! and
   1271 	!       pp + sizeof(page_t) + 64
   1272 	!
   1273 #define STRIDE1	128
   1274 #define STRIDE2	192
   1275 
   1276 #if	STRIDE1 != PAGE_SIZE
   1277 #error	"STRIDE1 != PAGE_SIZE"
   1278 #endif	/* STRIDE1 != PAGE_SIZE */
   1279 
   1280         ENTRY(prefetch_page_w)
   1281         prefetch        [%o0+STRIDE1], #n_writes
   1282         retl
   1283         prefetch        [%o0+STRIDE2], #n_writes
   1284         SET_SIZE(prefetch_page_w)
   1285 
   1286         ENTRY(prefetch_page_r)
   1287         prefetch        [%o0+STRIDE1], #n_reads
   1288         retl
   1289         prefetch        [%o0+STRIDE2], #n_reads
   1290         SET_SIZE(prefetch_page_r)
   1291 
   1292 #elif defined(OLYMPUS_C)
   1293 	!
   1294 	! Prefetch strides for Olympus-C
   1295 	!
   1296 
   1297 #define STRIDE1	0x440
   1298 #define STRIDE2	0x640
   1299 
   1300 	ENTRY(prefetch_page_w)
   1301         prefetch        [%o0+STRIDE1], #n_writes
   1302 	retl
   1303         prefetch        [%o0+STRIDE2], #n_writes
   1304 	SET_SIZE(prefetch_page_w)
   1305 
   1306 	ENTRY(prefetch_page_r)
   1307         prefetch        [%o0+STRIDE1], #n_writes
   1308 	retl
   1309         prefetch        [%o0+STRIDE2], #n_writes
   1310 	SET_SIZE(prefetch_page_r)
   1311 #else	/* OLYMPUS_C */
   1312 
   1313 #error "You need to fix this for your new cpu type."
   1314 
   1315 #endif	/* OLYMPUS_C */
   1316 
   1317 #endif	/* lint */
   1318 
   1319 #if defined(lint)
   1320 /*
   1321  * Prefetch struct smap for write.
   1322  */
   1323 /*ARGSUSED*/
   1324 void
   1325 prefetch_smap_w(void *smp)
   1326 {}
   1327 #else	/* lint */
   1328 
   1329 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
   1330 	defined(SERRANO)
   1331 
   1332 #define	PREFETCH_Q_LEN 8
   1333 
   1334 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
   1335 
   1336 #define	PREFETCH_Q_LEN 3
   1337 
   1338 #elif defined(OLYMPUS_C)
   1339 	!
   1340 	! Use length of one for now.
   1341 	!
   1342 #define	PREFETCH_Q_LEN	1
   1343 
   1344 #else 	/* OLYMPUS_C */
   1345 
   1346 #error You need to fix this for your new cpu type.
   1347 
   1348 #endif	/* OLYMPUS_C */
   1349 
   1350 #include <vm/kpm.h>
   1351 
   1352 #ifdef	SEGKPM_SUPPORT
   1353 
   1354 #define	SMAP_SIZE 72
   1355 #define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
   1356 
   1357 #else	/* SEGKPM_SUPPORT */
   1358 
   1359 	!
   1360 	! The hardware will prefetch the 64 byte cache aligned block
   1361 	! that contains the address specified in the prefetch instruction.
   1362 	! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
   1363 	! per pass will suffice as long as we prefetch far enough ahead to
   1364 	! make sure we don't stall for the cases where the smap object
   1365 	! spans multiple hardware prefetch blocks.  Let's prefetch as far
   1366 	! ahead as the hardware will allow.
   1367 	!
   1368 	! The smap array is processed with decreasing address pointers.
   1369 	!
   1370 #define	SMAP_SIZE 48
   1371 #define	SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
   1372 
   1373 #endif	/* SEGKPM_SUPPORT */
   1374 
   1375 	ENTRY(prefetch_smap_w)
   1376 	retl
   1377 	prefetch	[%o0-SMAP_STRIDE], #n_writes
   1378 	SET_SIZE(prefetch_smap_w)
   1379 
   1380 #endif	/* lint */
   1381 
   1382 #if defined(lint) || defined(__lint)
   1383 
   1384 /* ARGSUSED */
   1385 uint64_t
   1386 getidsr(void)
   1387 { return 0; }
   1388 
   1389 #else	/* lint */
   1390 
   1391 	ENTRY_NP(getidsr)
   1392 	retl
   1393 	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %o0
   1394 	SET_SIZE(getidsr)
   1395 
   1396 #endif	/* lint */
   1397