Home | History | Annotate | Download | only in sys
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef _SYS_CLOCK_H
     27 #define	_SYS_CLOCK_H
     28 
     29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     30 
     31 #ifdef	__cplusplus
     32 extern "C" {
     33 #endif
     34 
     35 #include <sys/spl.h>
     36 #include <sys/time.h>
     37 #include <sys/machclock.h>
     38 
     39 #ifndef _ASM
     40 
     41 #ifdef	_KERNEL
     42 
     43 extern void	setcpudelay(void);
     44 
     45 extern uint_t	nsec_scale;
     46 extern uint_t	nsec_shift;
     47 extern uint_t	nsec_per_sys_tick;
     48 extern uint64_t	sys_tick_freq;
     49 
     50 extern int	traptrace_use_stick;
     51 extern uint64_t	system_clock_freq;
     52 extern uint_t	sys_clock_mhz;
     53 
     54 extern void mon_clock_init(void);
     55 extern void mon_clock_start(void);
     56 extern void mon_clock_stop(void);
     57 extern void mon_clock_share(void);
     58 extern void mon_clock_unshare(void);
     59 
     60 extern hrtime_t hrtime_base;
     61 extern void hres_tick(void);
     62 extern void	clkstart(void);
     63 extern void cbe_level14();
     64 extern hrtime_t tick2ns(hrtime_t, uint_t);
     65 
     66 typedef struct {
     67 	uint64_t cbe_level1_inum;
     68 	uint64_t cbe_level10_inum;
     69 } cbe_data_t;
     70 
     71 #endif	/* _KERNEL */
     72 
     73 #endif	/* _ASM */
     74 
     75 
     76 #define	CBE_LOW_PIL	1
     77 #define	CBE_LOCK_PIL	LOCK_LEVEL
     78 #define	CBE_HIGH_PIL	14
     79 
     80 #define	ADJ_SHIFT	4	/* used in get_hrestime and _level10 */
     81 
     82 /*
     83  * Locking strategy for high-resolution timing services
     84  *
     85  * We generally construct timestamps from two or more components:
     86  * a hardware time source and one or more software time sources.
     87  * These components cannot all be loaded simultaneously, so we need
     88  * some sort of locking strategy to generate consistent timestamps.
     89  *
     90  * To minimize lock contention and cache thrashing we employ the
     91  * weakest possible synchronization model: writers (rare) serialize
     92  * on an acquisition-counting mutex, described below; readers (common)
     93  * execute in parallel with no synchronization at all -- they don't
     94  * exclude other readers, and they don't even exclude writers.  Instead,
     95  * readers just examine the writer lock's value before and after loading
     96  * all the components of a timestamp to detect writer intervention.
     97  * In the rare case when a writer does intervene, the reader will
     98  * detect it, discard the timestamp and try again.
     99  *
    100  * The writer lock, hres_lock, is a 32-bit integer consisting of an
    101  * 8-bit lock and a 24-bit acquisition count.  To acquire the lock we
    102  * set the lock field with ldstub, which sets the low-order 8 bits to
    103  * 0xff; to clear the lock, we increment it, which simultaneously clears
    104  * the lock field (0xff --> 0x00) and increments the acquisition count
    105  * (due to carry into bit 8).  Thus each acquisition transforms hres_lock
    106  * from N:0 to N:ff, and each release transforms N:ff into (N+1):0.
    107  *
    108  * Readers can detect writer intervention by loading hres_lock before
    109  * and after loading the time components they need; if either lock value
    110  * contains 0xff in the low-order bits (lock held), or if the lock values
    111  * are not equal (lock was acquired and released), a writer intervened
    112  * and the reader must try again.  If the lock values are equal and the
    113  * low-order 8 bits are clear, the timestamp must be valid.  We can check
    114  * both of these conditions with a single compare instruction by checking
    115  * whether old_hres_lock & ~1 == new_hres_lock, as illustrated by the
    116  * following table of all possible lock states:
    117  *
    118  *	initial	& ~1	final		result of compare
    119  *	------------	-----		-----------------
    120  *	now:00		now:00		valid
    121  *	now:00		now:ff		invalid
    122  *	now:00		later:00	invalid
    123  *	now:00		later:ff	invalid
    124  *	now:fe		now:ff		invalid
    125  *	now:fe		later:00	invalid
    126  *	now:fe		later:ff	invalid
    127  *
    128  * Implementation considerations:
    129  *
    130  * (1) Load buffering.
    131  *
    132  * On a CPU that does load buffering we must ensure that the load of
    133  * hres_lock completes before the load of any timestamp components.
    134  * This is essential *even on a CPU that does in-order loads* because
    135  * accessing the hardware time source may not involve a memory reference
    136  * (e.g. rd %tick).  A convenient way to address this is to clear the
    137  * lower bit (andn with 1) of the old lock value right away, since this
    138  * generates a dependency on the load of hres_lock.  We have to do this
    139  * anyway to perform the lock comparison described above.
    140  *
    141  * (2) Out-of-order loads.
    142  *
    143  * On a CPU that does out-of-order loads we must ensure that the loads
    144  * of all timestamp components have completed before we load the final
    145  * value of hres_lock.  This can be done either by generating load
    146  * dependencies on the timestamp components or by membar #LoadLoad.
    147  *
    148  * (3) Interaction with the high level cyclic handler, hres_tick().
    149  *
    150  * One unusual property of hres_lock is that it's acquired in a high
    151  * level cyclic handler, hres_tick().  Thus, hres_lock must be acquired at
    152  * CBE_HIGH_PIL or higher to prevent single-CPU deadlock.
    153  *
    154  * (4) Cross-calls.
    155  *
    156  * If a cross-call happens while one CPU has hres_lock and another is
    157  * trying to acquire it in the clock interrupt path, the system will
    158  * deadlock: the first CPU will never release hres_lock since it's
    159  * waiting to be released from the cross-call, and the cross-call can't
    160  * complete because the second CPU is spinning on hres_lock with traps
    161  * disabled.  Thus cross-calls must be blocked while holding hres_lock.
    162  *
    163  * Together, (3) and (4) imply that hres_lock should only be acquired
    164  * at PIL >= max(XCALL_PIL, CBE_HIGH_PIL), or while traps are disabled.
    165  */
    166 #define	HRES_LOCK_OFFSET 3
    167 
    168 #define	CLOCK_LOCK(oldsplp)	\
    169 	lock_set_spl((lock_t *)&hres_lock + HRES_LOCK_OFFSET, \
    170 		ipltospl(CBE_HIGH_PIL), oldsplp)
    171 
    172 #define	CLOCK_UNLOCK(spl)	\
    173 	membar_ldst_stst();	\
    174 	hres_lock++;		\
    175 	splx(spl);		\
    176 	LOCKSTAT_RECORD0(LS_CLOCK_UNLOCK_RELEASE,	\
    177 		(lock_t *)&hres_lock + HRES_LOCK_OFFSET);
    178 
    179 /*
    180  * NATIVE_TIME_TO_NSEC_SCALE is called with NSEC_SHIFT to convert hi-res
    181  * timestamps into nanoseconds. On systems that have a %stick register,
    182  * hi-res timestamps are in %stick units. On systems that do not have a
    183  * %stick register, hi-res timestamps are in %tick units.
    184  *
    185  * NATIVE_TIME_TO_NSEC_SCALE is called with TICK_NSEC_SHIFT to convert from
    186  * %tick units to nanoseconds on all implementations whether %stick is
    187  * available or not.
    188  */
    189 
    190 /*
    191  * At least 62.5 MHz CPU %tick frequency
    192  */
    193 
    194 #define	TICK_NSEC_SHIFT	4
    195 
    196 /*
    197  * Convert hi-res native time (V9's %tick in our case) into nanoseconds.
    198  *
    199  * The challenge is to multiply a %tick value by (NANOSEC / sys_tick_freq)
    200  * without using floating point and without overflowing 64-bit integers.
    201  * We assume that all sun4u systems will have a 16 nsec or better clock
    202  * (i.e. faster than 62.5 MHz), which means that (ticks << 4) has units
    203  * greater than one nanosecond, so converting from (ticks << 4) to nsec
    204  * requires multiplication by a rational number, R, between 0 and 1.
    205  * To avoid floating-point we precompute (R * 2^32) during boot and
    206  * stash this away in nsec_scale.  Thus we can compute (tick * R) as
    207  * (tick * nsec_scale) >> 32, which is accurate to about 1 part per billion.
    208  *
    209  * To avoid 64-bit overflow when multiplying (tick << 4) by nsec_scale,
    210  * we split (tick << 4) into its high and low 32-bit pieces, H and L,
    211  * multiply each piece separately, and add up the relevant bits of the
    212  * partial products.  Putting it all together we have:
    213  *
    214  * nsec = (tick << 4) * R
    215  *	= ((tick << 4) * nsec_scale) >> 32
    216  *	= ((H << 32) + L) * nsec_scale) >> 32
    217  *	= (H * nsec_scale) + ((L * nsec_scale) >> 32)
    218  *
    219  * The last line is the computation we actually perform: it requires no
    220  * floating point and all intermediate results fit in 64-bit registers.
    221  *
    222  * Note that we require that tick is less than (1 << (64 - NSEC_SHIFT));
    223  * greater values will result in overflow and misbehavior (not that this
    224  * is a serious problem; (1 << (64 - NSEC_SHIFT)) nanoseconds is over
    225  * thirty-six years).  Nonetheless, clients may wish to be aware of this
    226  * limitation; NATIVE_TIME_MAX() returns this maximum native time.
    227  *
    228  * We provide two versions of this macro: a "full-service" version that
    229  * just converts ticks to nanoseconds and a higher-performance version that
    230  * expects the scaling factor nsec_scale as its second argument (so that
    231  * callers can distance the load of nsec_scale from its use).  Note that
    232  * we take a fast path if we determine the ticks to be less than 32 bits
    233  * (as it often is for the delta between %tick values for successive
    234  * firings of the hres_tick() cyclic).
    235  *
    236  * Note that in the 32-bit path we don't even bother clearing NPT.
    237  * We get away with this by making hardclk.c ensure than nsec_scale
    238  * is even, so we can take advantage of the associativity of modular
    239  * arithmetic: multiplying %tick by any even number, say 2*n, is
    240  * equivalent to multiplying %tick by 2, then by n.  Multiplication
    241  * by 2 is equivalent to shifting left by one, which clears NPT.
    242  *
    243  * Finally, note that the macros use the labels "6:" and "7:"; these
    244  * labels must not be used across an invocation of either macro.
    245  */
    246 #define	NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, shift)		\
    247 	srlx	out, 32, scr2;		/* check high 32 bits */	\
    248 /* CSTYLED */ 								\
    249 	brz,a,pt scr2, 6f;		/* if clear, 32-bit fast path */\
    250 	mulx	out, scr1, out;		/* delay: 32-bit fast path */	\
    251 	sllx	out, shift, out;	/* clear NPT and pre-scale */	\
    252 	srlx	out, 32, scr2;		/* scr2 = hi32(tick<<4) = H */	\
    253 	mulx	scr2, scr1, scr2;	/* scr2 = (H*F) */		\
    254 	srl	out, 0, out;		/* out = lo32(tick<<4) = L */	\
    255 	mulx	out, scr1, scr1;	/* scr1 = (L*F) */		\
    256 	srlx	scr1, 32, scr1;		/* scr1 = (L*F) >> 32 */	\
    257 	ba	7f;			/* branch over 32-bit path */	\
    258 	add	scr1, scr2, out;	/* out = (H*F) + ((L*F) >> 32) */\
    259 6:									\
    260 	srlx	out, 32 - shift, out;					\
    261 7:
    262 
    263 #define	NATIVE_TIME_TO_NSEC(out, scr1, scr2)				\
    264 	sethi	%hi(nsec_scale), scr1;	/* load scaling factor */	\
    265 	ld	[scr1 + %lo(nsec_scale)], scr1;				\
    266 	NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, NSEC_SHIFT);
    267 
    268 #define	NATIVE_TIME_MAX(out)						\
    269 	mov	-1, out;						\
    270 	srlx	out, NSEC_SHIFT, out
    271 
    272 
    273 /*
    274  * The following macros are only for use in the cpu module.
    275  */
    276 #if defined(CPU_MODULE)
    277 
    278 /*
    279  * NSEC_SHIFT and VTRACE_SHIFT constants are defined in
    280  * <sys/machclock.h> file.
    281  */
    282 
    283 
    284 /*
    285  * NOTE: the macros below assume that the various time-related variables
    286  * (hrestime, hrestime_adj, hres_last_tick, timedelta, nsec_scale, etc)
    287  * are all stored together on a 64-byte boundary.  The primary motivation
    288  * is cache performance, but we also take advantage of a convenient side
    289  * effect: these variables all have the same high 22 address bits, so only
    290  * one sethi is needed to access them all.
    291  */
    292 
    293 /*
    294  * GET_HRESTIME() returns the value of hrestime, hrestime_adj and the
    295  * number of nanoseconds since the last clock tick ('nslt').  It also
    296  * sets 'nano' to the value NANOSEC (one billion).
    297  *
    298  * This macro assumes that all registers are globals or outs so they can
    299  * safely contain 64-bit data, and that it's safe to use the label "5:".
    300  * Further, this macro calls the NATIVE_TIME_TO_NSEC_SCALE which in turn
    301  * uses the labels "6:" and "7:"; labels "5:", "6:" and "7:" must not
    302  * be used across invocations of this macro.
    303  */
    304 #define	GET_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano, scr, hrlock, \
    305     gnt1, gnt2) \
    306 5:	sethi	%hi(hres_lock), scr;					\
    307 	lduw	[scr + %lo(hres_lock)], hrlock;	/* load clock lock */	\
    308 	lduw	[scr + %lo(nsec_scale)], nano;	/* tick-to-ns factor */	\
    309 	andn	hrlock, 1, hrlock;  	/* see comments above! */	\
    310 	ldx	[scr + %lo(hres_last_tick)], nslt;			\
    311 	ldn	[scr + %lo(hrestime)], hrestsec; /* load hrestime.sec */\
    312 	add	scr, %lo(hrestime), hrestnsec;				\
    313 	ldn	[hrestnsec + CLONGSIZE], hrestnsec;			\
    314 	GET_NATIVE_TIME(adj, gnt1, gnt2);	/* get current %tick */	\
    315 	subcc	adj, nslt, nslt; /* nslt = ticks since last clockint */	\
    316 	movneg	%xcc, %g0, nslt; /* ignore neg delta from tick skew */	\
    317 	ldx	[scr + %lo(hrestime_adj)], adj; /* load hrestime_adj */	\
    318 	/* membar #LoadLoad; (see comment (2) above) */			\
    319 	lduw	[scr + %lo(hres_lock)], scr; /* load clock lock */	\
    320 	NATIVE_TIME_TO_NSEC_SCALE(nslt, nano, gnt1, NSEC_SHIFT);	\
    321 	sethi	%hi(NANOSEC), nano;					\
    322 	xor	hrlock, scr, scr;					\
    323 /* CSTYLED */ 								\
    324 	brnz,pn	scr, 5b;						\
    325 	or	nano, %lo(NANOSEC), nano;
    326 
    327 /*
    328  * Similar to above, but returns current gethrtime() value in 'base'.
    329  */
    330 #define	GET_HRTIME(base, now, nslt, scale, scr, hrlock, gnt1, gnt2)	\
    331 5:	sethi	%hi(hres_lock), scr;					\
    332 	lduw	[scr + %lo(hres_lock)], hrlock;	/* load clock lock */	\
    333 	lduw	[scr + %lo(nsec_scale)], scale;	/* tick-to-ns factor */	\
    334 	andn	hrlock, 1, hrlock;  	/* see comments above! */	\
    335 	ldx	[scr + %lo(hres_last_tick)], nslt;			\
    336 	ldx	[scr + %lo(hrtime_base)], base;	/* load hrtime_base */	\
    337 	GET_NATIVE_TIME(now, gnt1, gnt2);	/* get current %tick */	\
    338 	subcc	now, nslt, nslt; /* nslt = ticks since last clockint */	\
    339 	movneg	%xcc, %g0, nslt; /* ignore neg delta from tick skew */	\
    340 	/* membar #LoadLoad; (see comment (2) above) */			\
    341 	ld	[scr + %lo(hres_lock)], scr; /* load clock lock */	\
    342 	NATIVE_TIME_TO_NSEC_SCALE(nslt, scale, gnt1, NSEC_SHIFT);	\
    343 	xor	hrlock, scr, scr;					\
    344 /* CSTYLED */ 								\
    345 	brnz,pn	scr, 5b;						\
    346 	add	base, nslt, base;
    347 
    348 /*
    349  * Maximum-performance timestamp for kernel tracing.  We don't bother
    350  * clearing NPT because vtrace expresses everything in 32-bit deltas,
    351  * so only the low-order 32 bits matter.  We do shift down a few bits,
    352  * however, so that the trace framework doesn't emit a ridiculous number
    353  * of 32_bit_elapsed_time records (trace points are more expensive when
    354  * the time since the last trace point doesn't fit in a 16-bit delta).
    355  * We currently shift by 4 (divide by 16) on the grounds that (1) there's
    356  * no point making the timing finer-grained than the trace point latency,
    357  * which exceeds 16 cycles; and (2) the cost and probe effect of many
    358  * 32-bit time records far exceeds the cost of the 'srlx' instruction.
    359  */
    360 #define	GET_VTRACE_TIME(out, scr1, scr2)				\
    361 	GET_NATIVE_TIME(out, scr1, scr2);	/* get current %tick */	\
    362 	srlx	out, VTRACE_SHIFT, out;
    363 
    364 /*
    365  * Full 64-bit version for those truly rare occasions when you need it.
    366  * Currently this is only needed to generate the TR_START_TIME record.
    367  */
    368 #define	GET_VTRACE_TIME_64(out, scr1, scr2)				\
    369 	GET_NATIVE_TIME(out, scr1, scr2);	/* get current %tick */	\
    370 	add	out, out, out;						\
    371 	srlx	out, VTRACE_SHIFT + 1, out;
    372 
    373 /*
    374  * Return the rate at which the vtrace clock runs.
    375  */
    376 #define	GET_VTRACE_FREQUENCY(out, scr1, scr2)				\
    377 	sethi	%hi(sys_tick_freq), out;				\
    378 	ldx	[out + %lo(sys_tick_freq)], out;			\
    379 	srlx	out, VTRACE_SHIFT, out;
    380 
    381 #endif /* CPU_MODULE */
    382 
    383 #ifdef	__cplusplus
    384 }
    385 #endif
    386 
    387 #endif	/* !_SYS_CLOCK_H */
    388