Home | History | Annotate | Download | only in sparcv9
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 	.file	"atomic.s"
     28 
     29 #include <sys/asm_linkage.h>
     30 
     31 /*
     32  * ATOMIC_BO_ENABLE_SHIFT can be selectively defined by processors
     33  * to enable exponential backoff. No definition means backoff is
     34  * not desired i.e. backoff should be disabled.
     35  * By default, the shift value is used to generate a power of 2
     36  * value for backoff limit. In the kernel, processors scale this
     37  * shift value with the number of online cpus.
     38  */
     39 
     40 #if defined(_KERNEL)
     41 	/*
     42 	 * Legacy kernel interfaces; they will go away (eventually).
     43 	 */
     44 	ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
     45 	ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
     46 	ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
     47 	ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
     48 	ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
     49 	ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
     50 	ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
     51 	ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function)
     52 
     53 #ifdef ATOMIC_BO_ENABLE_SHIFT
     54 
     55 #if !defined(lint)
     56 	.weak   cpu_atomic_delay
     57 	.type   cpu_atomic_delay, #function
     58 #endif  /* lint */
     59 
     60 /*
     61  * For the kernel, invoke processor specific delay routine to perform
     62  * low-impact spin delay. The value of ATOMIC_BO_ENABLE_SHIFT is tuned
     63  * with respect to the specific spin delay implementation.
     64  */
     65 #define	DELAY_SPIN(label, tmp1, tmp2)					\
     66 	/*								; \
     67 	 * Define a pragma weak reference to a cpu specific		; \
     68 	 * delay routine for atomic backoff. For CPUs that		; \
     69 	 * have no such delay routine defined, the delay becomes	; \
     70 	 * just a simple tight loop.					; \
     71 	 *								; \
     72 	 * tmp1 = holds CPU specific delay routine			; \
     73 	 * tmp2 = holds atomic routine's callee return address		; \
     74 	 */								; \
     75 	sethi	%hi(cpu_atomic_delay), tmp1				; \
     76 	or	tmp1, %lo(cpu_atomic_delay), tmp1			; \
     77 label/**/0:								; \
     78 	brz,pn	tmp1, label/**/1					; \
     79 	mov	%o7, tmp2						; \
     80 	jmpl	tmp1, %o7	/* call CPU specific delay routine */	; \
     81 	  nop			/* delay slot : do nothing */		; \
     82 	mov	tmp2, %o7	/* restore callee's return address */	; \
     83 label/**/1:
     84 
     85 /*
     86  * For the kernel, we take into consideration of cas failures
     87  * and also scale the backoff limit w.r.t. the number of cpus.
     88  * For cas failures, we reset the backoff value to 1 if the cas
     89  * failures exceed or equal to the number of online cpus. This
     90  * will enforce some degree of fairness and prevent starvation.
     91  * We also scale/normalize the processor provided specific
     92  * ATOMIC_BO_ENABLE_SHIFT w.r.t. the number of online cpus to
     93  * obtain the actual final limit to use.
     94  */
     95 #define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)		\
     96 	brnz,pt	ncpu, label/**/0					; \
     97 	  inc	cas_cnt							; \
     98 	sethi	%hi(ncpus_online), ncpu					; \
     99 	ld	[ncpu + %lo(ncpus_online)], ncpu			; \
    100 label/**/0:								; \
    101 	cmp	cas_cnt, ncpu						; \
    102 	blu,pt	%xcc, label/**/1					; \
    103 	  sllx	ncpu, ATOMIC_BO_ENABLE_SHIFT, limit			; \
    104 	mov	%g0, cas_cnt						; \
    105 	mov	1, val							; \
    106 label/**/1:
    107 #endif	/* ATOMIC_BO_ENABLE_SHIFT */
    108 
    109 #else	/* _KERNEL */
    110 
    111 /*
    112  * ATOMIC_BO_ENABLE_SHIFT may be enabled/defined here for generic
    113  * libc atomics. None for now.
    114  */
    115 #ifdef ATOMIC_BO_ENABLE_SHIFT
    116 #define	DELAY_SPIN(label, tmp1, tmp2)	\
    117 label/**/0:
    118 
    119 #define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)  \
    120 	set	1 << ATOMIC_BO_ENABLE_SHIFT, limit
    121 #endif	/* ATOMIC_BO_ENABLE_SHIFT */
    122 #endif	/* _KERNEL */
    123 
    124 #ifdef ATOMIC_BO_ENABLE_SHIFT
    125 /*
    126  * ATOMIC_BACKOFF_INIT macro for initialization.
    127  * backoff val is initialized to 1.
    128  * ncpu is initialized to 0
    129  * The cas_cnt counts the cas instruction failure and is
    130  * initialized to 0.
    131  */
    132 #define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)	\
    133 	mov	1, val				; \
    134 	mov	%g0, ncpu			; \
    135 	mov	%g0, cas_cnt
    136 
    137 #define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
    138 	bne,a,pn cr, backoff
    139 
    140 /*
    141  * Main ATOMIC_BACKOFF_BACKOFF macro for backoff.
    142  */
    143 #define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel) \
    144 	ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label/**/_0)	; \
    145 	cmp	val, limit						; \
    146 	blu,a,pt %xcc, label/**/_1					; \
    147 	  mov	val, limit						; \
    148 label/**/_1:								; \
    149 	mov	limit, val						; \
    150 	DELAY_SPIN(label/**/_2, %g2, %g3)				; \
    151 	deccc	limit							; \
    152 	bgu,pn	%xcc, label/**/_20 /* branch to middle of DELAY_SPIN */	; \
    153 	  nop								; \
    154 	ba	retlabel						; \
    155 	sllx	val, 1, val
    156 
    157 #else	/* ATOMIC_BO_ENABLE_SHIFT */
    158 #define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)
    159 
    160 #define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
    161 	bne,a,pn cr, loop
    162 
    163 #define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel)
    164 #endif	/* ATOMIC_BO_ENABLE_SHIFT */
    165 
    166 	/*
    167 	 * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever
    168 	 * separated, you need to also edit the libc sparcv9 platform
    169 	 * specific mapfile and remove the NODYNSORT attribute
    170 	 * from atomic_inc_8_nv.
    171 	 */
    172 	ENTRY(atomic_inc_8)
    173 	ALTENTRY(atomic_inc_8_nv)
    174 	ALTENTRY(atomic_inc_uchar)
    175 	ALTENTRY(atomic_inc_uchar_nv)
    176 	ba	add_8
    177 	  add	%g0, 1, %o1
    178 	SET_SIZE(atomic_inc_uchar_nv)
    179 	SET_SIZE(atomic_inc_uchar)
    180 	SET_SIZE(atomic_inc_8_nv)
    181 	SET_SIZE(atomic_inc_8)
    182 
    183 	/*
    184 	 * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever
    185 	 * separated, you need to also edit the libc sparcv9 platform
    186 	 * specific mapfile and remove the NODYNSORT attribute
    187 	 * from atomic_dec_8_nv.
    188 	 */
    189 	ENTRY(atomic_dec_8)
    190 	ALTENTRY(atomic_dec_8_nv)
    191 	ALTENTRY(atomic_dec_uchar)
    192 	ALTENTRY(atomic_dec_uchar_nv)
    193 	ba	add_8
    194 	  sub	%g0, 1, %o1
    195 	SET_SIZE(atomic_dec_uchar_nv)
    196 	SET_SIZE(atomic_dec_uchar)
    197 	SET_SIZE(atomic_dec_8_nv)
    198 	SET_SIZE(atomic_dec_8)
    199 
    200 	/*
    201 	 * NOTE: If atomic_add_8 and atomic_add_8_nv are ever
    202 	 * separated, you need to also edit the libc sparcv9 platform
    203 	 * specific mapfile and remove the NODYNSORT attribute
    204 	 * from atomic_add_8_nv.
    205 	 */
    206 	ENTRY(atomic_add_8)
    207 	ALTENTRY(atomic_add_8_nv)
    208 	ALTENTRY(atomic_add_char)
    209 	ALTENTRY(atomic_add_char_nv)
    210 add_8:
    211 	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
    212 	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
    213 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    214 	set	0xff, %o3		! %o3 = mask
    215 	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
    216 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    217 	and	%o1, %o3, %o1		! %o1 = single byte value
    218 	andn	%o0, 0x3, %o0		! %o0 = word address
    219 	ld	[%o0], %o2		! read old value
    220 1:
    221 	add	%o2, %o1, %o5		! add value to the old value
    222 	and	%o5, %o3, %o5		! clear other bits
    223 	andn	%o2, %o3, %o4		! clear target bits
    224 	or	%o4, %o5, %o5		! insert the new value
    225 	cas	[%o0], %o2, %o5
    226 	cmp	%o2, %o5
    227 	bne,a,pn %icc, 1b
    228 	  mov	%o5, %o2		! %o2 = old value
    229 	add	%o2, %o1, %o5
    230 	and	%o5, %o3, %o5
    231 	retl
    232 	srl	%o5, %g1, %o0		! %o0 = new value
    233 	SET_SIZE(atomic_add_char_nv)
    234 	SET_SIZE(atomic_add_char)
    235 	SET_SIZE(atomic_add_8_nv)
    236 	SET_SIZE(atomic_add_8)
    237 
    238 	/*
    239 	 * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever
    240 	 * separated, you need to also edit the libc sparcv9 platform
    241 	 * specific mapfile and remove the NODYNSORT attribute
    242 	 * from atomic_inc_16_nv.
    243 	 */
    244 	ENTRY(atomic_inc_16)
    245 	ALTENTRY(atomic_inc_16_nv)
    246 	ALTENTRY(atomic_inc_ushort)
    247 	ALTENTRY(atomic_inc_ushort_nv)
    248 	ba	add_16
    249 	  add	%g0, 1, %o1
    250 	SET_SIZE(atomic_inc_ushort_nv)
    251 	SET_SIZE(atomic_inc_ushort)
    252 	SET_SIZE(atomic_inc_16_nv)
    253 	SET_SIZE(atomic_inc_16)
    254 
    255 	/*
    256 	 * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever
    257 	 * separated, you need to also edit the libc sparcv9 platform
    258 	 * specific mapfile and remove the NODYNSORT attribute
    259 	 * from atomic_dec_16_nv.
    260 	 */
    261 	ENTRY(atomic_dec_16)
    262 	ALTENTRY(atomic_dec_16_nv)
    263 	ALTENTRY(atomic_dec_ushort)
    264 	ALTENTRY(atomic_dec_ushort_nv)
    265 	ba	add_16
    266 	  sub	%g0, 1, %o1
    267 	SET_SIZE(atomic_dec_ushort_nv)
    268 	SET_SIZE(atomic_dec_ushort)
    269 	SET_SIZE(atomic_dec_16_nv)
    270 	SET_SIZE(atomic_dec_16)
    271 
    272 	/*
    273 	 * NOTE: If atomic_add_16 and atomic_add_16_nv are ever
    274 	 * separated, you need to also edit the libc sparcv9 platform
    275 	 * specific mapfile and remove the NODYNSORT attribute
    276 	 * from atomic_add_16_nv.
    277 	 */
    278 	ENTRY(atomic_add_16)
    279 	ALTENTRY(atomic_add_16_nv)
    280 	ALTENTRY(atomic_add_short)
    281 	ALTENTRY(atomic_add_short_nv)
    282 add_16:
    283 	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
    284 	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
    285 	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
    286 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    287 	sethi	%hi(0xffff0000), %o3	! %o3 = mask
    288 	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
    289 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    290 	and	%o1, %o3, %o1		! %o1 = single short value
    291 	andn	%o0, 0x2, %o0		! %o0 = word address
    292 	! if low-order bit is 1, we will properly get an alignment fault here
    293 	ld	[%o0], %o2		! read old value
    294 1:
    295 	add	%o1, %o2, %o5		! add value to the old value
    296 	and	%o5, %o3, %o5		! clear other bits
    297 	andn	%o2, %o3, %o4		! clear target bits
    298 	or	%o4, %o5, %o5		! insert the new value
    299 	cas	[%o0], %o2, %o5
    300 	cmp	%o2, %o5
    301 	bne,a,pn %icc, 1b
    302 	  mov	%o5, %o2		! %o2 = old value
    303 	add	%o1, %o2, %o5
    304 	and	%o5, %o3, %o5
    305 	retl
    306 	srl	%o5, %g1, %o0		! %o0 = new value
    307 	SET_SIZE(atomic_add_short_nv)
    308 	SET_SIZE(atomic_add_short)
    309 	SET_SIZE(atomic_add_16_nv)
    310 	SET_SIZE(atomic_add_16)
    311 
    312 	/*
    313 	 * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever
    314 	 * separated, you need to also edit the libc sparcv9 platform
    315 	 * specific mapfile and remove the NODYNSORT attribute
    316 	 * from atomic_inc_32_nv.
    317 	 */
    318 	ENTRY(atomic_inc_32)
    319 	ALTENTRY(atomic_inc_32_nv)
    320 	ALTENTRY(atomic_inc_uint)
    321 	ALTENTRY(atomic_inc_uint_nv)
    322 	ba	add_32
    323 	  add	%g0, 1, %o1
    324 	SET_SIZE(atomic_inc_uint_nv)
    325 	SET_SIZE(atomic_inc_uint)
    326 	SET_SIZE(atomic_inc_32_nv)
    327 	SET_SIZE(atomic_inc_32)
    328 
    329 	/*
    330 	 * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever
    331 	 * separated, you need to also edit the libc sparcv9 platform
    332 	 * specific mapfile and remove the NODYNSORT attribute
    333 	 * from atomic_dec_32_nv.
    334 	 */
    335 	ENTRY(atomic_dec_32)
    336 	ALTENTRY(atomic_dec_32_nv)
    337 	ALTENTRY(atomic_dec_uint)
    338 	ALTENTRY(atomic_dec_uint_nv)
    339 	ba	add_32
    340 	  sub	%g0, 1, %o1
    341 	SET_SIZE(atomic_dec_uint_nv)
    342 	SET_SIZE(atomic_dec_uint)
    343 	SET_SIZE(atomic_dec_32_nv)
    344 	SET_SIZE(atomic_dec_32)
    345 
    346 	/*
    347 	 * NOTE: If atomic_add_32 and atomic_add_32_nv are ever
    348 	 * separated, you need to also edit the libc sparcv9 platform
    349 	 * specific mapfile and remove the NODYNSORT attribute
    350 	 * from atomic_add_32_nv.
    351 	 */
    352 	ENTRY(atomic_add_32)
    353 	ALTENTRY(atomic_add_32_nv)
    354 	ALTENTRY(atomic_add_int)
    355 	ALTENTRY(atomic_add_int_nv)
    356 add_32:
    357 	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
    358 0:
    359 	ld	[%o0], %o2
    360 1:
    361 	add	%o2, %o1, %o3
    362 	cas	[%o0], %o2, %o3
    363 	cmp	%o2, %o3
    364 	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
    365 	  mov	%o3, %o2
    366 	retl
    367 	add	%o2, %o1, %o0		! return new value
    368 2:
    369 	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add32, 0b)
    370 	SET_SIZE(atomic_add_int_nv)
    371 	SET_SIZE(atomic_add_int)
    372 	SET_SIZE(atomic_add_32_nv)
    373 	SET_SIZE(atomic_add_32)
    374 
    375 	/*
    376 	 * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
    377 	 * separated, you need to also edit the libc sparcv9 platform
    378 	 * specific mapfile and remove the NODYNSORT attribute
    379 	 * from atomic_inc_64_nv.
    380 	 */
    381 	ENTRY(atomic_inc_64)
    382 	ALTENTRY(atomic_inc_64_nv)
    383 	ALTENTRY(atomic_inc_ulong)
    384 	ALTENTRY(atomic_inc_ulong_nv)
    385 	ba	add_64
    386 	  add	%g0, 1, %o1
    387 	SET_SIZE(atomic_inc_ulong_nv)
    388 	SET_SIZE(atomic_inc_ulong)
    389 	SET_SIZE(atomic_inc_64_nv)
    390 	SET_SIZE(atomic_inc_64)
    391 
    392 	/*
    393 	 * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
    394 	 * separated, you need to also edit the libc sparcv9 platform
    395 	 * specific mapfile and remove the NODYNSORT attribute
    396 	 * from atomic_dec_64_nv.
    397 	 */
    398 	ENTRY(atomic_dec_64)
    399 	ALTENTRY(atomic_dec_64_nv)
    400 	ALTENTRY(atomic_dec_ulong)
    401 	ALTENTRY(atomic_dec_ulong_nv)
    402 	ba	add_64
    403 	  sub	%g0, 1, %o1
    404 	SET_SIZE(atomic_dec_ulong_nv)
    405 	SET_SIZE(atomic_dec_ulong)
    406 	SET_SIZE(atomic_dec_64_nv)
    407 	SET_SIZE(atomic_dec_64)
    408 
    409 	/*
    410 	 * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
    411 	 * separated, you need to also edit the libc sparcv9 platform
    412 	 * specific mapfile and remove the NODYNSORT attribute
    413 	 * from atomic_add_64_nv.
    414 	 */
    415 	ENTRY(atomic_add_64)
    416 	ALTENTRY(atomic_add_64_nv)
    417 	ALTENTRY(atomic_add_ptr)
    418 	ALTENTRY(atomic_add_ptr_nv)
    419 	ALTENTRY(atomic_add_long)
    420 	ALTENTRY(atomic_add_long_nv)
    421 add_64:
    422 	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
    423 0:
    424 	ldx	[%o0], %o2
    425 1:
    426 	add	%o2, %o1, %o3
    427 	casx	[%o0], %o2, %o3
    428 	cmp	%o2, %o3
    429 	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
    430 	  mov	%o3, %o2
    431 	retl
    432 	add	%o2, %o1, %o0		! return new value
    433 2:
    434 	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add64, 0b)
    435 	SET_SIZE(atomic_add_long_nv)
    436 	SET_SIZE(atomic_add_long)
    437 	SET_SIZE(atomic_add_ptr_nv)
    438 	SET_SIZE(atomic_add_ptr)
    439 	SET_SIZE(atomic_add_64_nv)
    440 	SET_SIZE(atomic_add_64)
    441 
    442 	/*
    443 	 * NOTE: If atomic_or_8 and atomic_or_8_nv are ever
    444 	 * separated, you need to also edit the libc sparcv9 platform
    445 	 * specific mapfile and remove the NODYNSORT attribute
    446 	 * from atomic_or_8_nv.
    447 	 */
    448 	ENTRY(atomic_or_8)
    449 	ALTENTRY(atomic_or_8_nv)
    450 	ALTENTRY(atomic_or_uchar)
    451 	ALTENTRY(atomic_or_uchar_nv)
    452 	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
    453 	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
    454 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    455 	set	0xff, %o3		! %o3 = mask
    456 	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
    457 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    458 	and	%o1, %o3, %o1		! %o1 = single byte value
    459 	andn	%o0, 0x3, %o0		! %o0 = word address
    460 	ld	[%o0], %o2		! read old value
    461 1:
    462 	or	%o2, %o1, %o5		! or in the new value
    463 	cas	[%o0], %o2, %o5
    464 	cmp	%o2, %o5
    465 	bne,a,pn %icc, 1b
    466 	  mov	%o5, %o2		! %o2 = old value
    467 	or	%o2, %o1, %o5
    468 	and	%o5, %o3, %o5
    469 	retl
    470 	srl	%o5, %g1, %o0		! %o0 = new value
    471 	SET_SIZE(atomic_or_uchar_nv)
    472 	SET_SIZE(atomic_or_uchar)
    473 	SET_SIZE(atomic_or_8_nv)
    474 	SET_SIZE(atomic_or_8)
    475 
    476 	/*
    477 	 * NOTE: If atomic_or_16 and atomic_or_16_nv are ever
    478 	 * separated, you need to also edit the libc sparcv9 platform
    479 	 * specific mapfile and remove the NODYNSORT attribute
    480 	 * from atomic_or_16_nv.
    481 	 */
    482 	ENTRY(atomic_or_16)
    483 	ALTENTRY(atomic_or_16_nv)
    484 	ALTENTRY(atomic_or_ushort)
    485 	ALTENTRY(atomic_or_ushort_nv)
    486 	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
    487 	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
    488 	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
    489 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    490 	sethi	%hi(0xffff0000), %o3	! %o3 = mask
    491 	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
    492 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    493 	and	%o1, %o3, %o1		! %o1 = single short value
    494 	andn	%o0, 0x2, %o0		! %o0 = word address
    495 	! if low-order bit is 1, we will properly get an alignment fault here
    496 	ld	[%o0], %o2		! read old value
    497 1:
    498 	or	%o2, %o1, %o5		! or in the new value
    499 	cas	[%o0], %o2, %o5
    500 	cmp	%o2, %o5
    501 	bne,a,pn %icc, 1b
    502 	  mov	%o5, %o2		! %o2 = old value
    503 	or	%o2, %o1, %o5		! or in the new value
    504 	and	%o5, %o3, %o5
    505 	retl
    506 	srl	%o5, %g1, %o0		! %o0 = new value
    507 	SET_SIZE(atomic_or_ushort_nv)
    508 	SET_SIZE(atomic_or_ushort)
    509 	SET_SIZE(atomic_or_16_nv)
    510 	SET_SIZE(atomic_or_16)
    511 
    512 	/*
    513 	 * NOTE: If atomic_or_32 and atomic_or_32_nv are ever
    514 	 * separated, you need to also edit the libc sparcv9 platform
    515 	 * specific mapfile and remove the NODYNSORT attribute
    516 	 * from atomic_or_32_nv.
    517 	 */
    518 	ENTRY(atomic_or_32)
    519 	ALTENTRY(atomic_or_32_nv)
    520 	ALTENTRY(atomic_or_uint)
    521 	ALTENTRY(atomic_or_uint_nv)
    522 	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
    523 0:
    524 	ld	[%o0], %o2
    525 1:
    526 	or	%o2, %o1, %o3
    527 	cas	[%o0], %o2, %o3
    528 	cmp	%o2, %o3
    529 	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
    530 	  mov	%o3, %o2
    531 	retl
    532 	or	%o2, %o1, %o0		! return new value
    533 2:
    534 	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or32, 0b)
    535 	SET_SIZE(atomic_or_uint_nv)
    536 	SET_SIZE(atomic_or_uint)
    537 	SET_SIZE(atomic_or_32_nv)
    538 	SET_SIZE(atomic_or_32)
    539 
    540 	/*
    541 	 * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
    542 	 * separated, you need to also edit the libc sparcv9 platform
    543 	 * specific mapfile and remove the NODYNSORT attribute
    544 	 * from atomic_or_64_nv.
    545 	 */
    546 	ENTRY(atomic_or_64)
    547 	ALTENTRY(atomic_or_64_nv)
    548 	ALTENTRY(atomic_or_ulong)
    549 	ALTENTRY(atomic_or_ulong_nv)
    550 	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
    551 0:
    552 	ldx	[%o0], %o2
    553 1:
    554 	or	%o2, %o1, %o3
    555 	casx	[%o0], %o2, %o3
    556 	cmp	%o2, %o3
    557 	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
    558 	  mov	%o3, %o2
    559 	retl
    560 	or	%o2, %o1, %o0		! return new value
    561 2:
    562 	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or64, 0b)
    563 	SET_SIZE(atomic_or_ulong_nv)
    564 	SET_SIZE(atomic_or_ulong)
    565 	SET_SIZE(atomic_or_64_nv)
    566 	SET_SIZE(atomic_or_64)
    567 
    568 	/*
    569 	 * NOTE: If atomic_and_8 and atomic_and_8_nv are ever
    570 	 * separated, you need to also edit the libc sparcv9 platform
    571 	 * specific mapfile and remove the NODYNSORT attribute
    572 	 * from atomic_and_8_nv.
    573 	 */
    574 	ENTRY(atomic_and_8)
    575 	ALTENTRY(atomic_and_8_nv)
    576 	ALTENTRY(atomic_and_uchar)
    577 	ALTENTRY(atomic_and_uchar_nv)
    578 	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
    579 	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
    580 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    581 	set	0xff, %o3		! %o3 = mask
    582 	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
    583 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    584 	orn	%o1, %o3, %o1		! all ones in other bytes
    585 	andn	%o0, 0x3, %o0		! %o0 = word address
    586 	ld	[%o0], %o2		! read old value
    587 1:
    588 	and	%o2, %o1, %o5		! and in the new value
    589 	cas	[%o0], %o2, %o5
    590 	cmp	%o2, %o5
    591 	bne,a,pn %icc, 1b
    592 	  mov	%o5, %o2		! %o2 = old value
    593 	and	%o2, %o1, %o5
    594 	and	%o5, %o3, %o5
    595 	retl
    596 	srl	%o5, %g1, %o0		! %o0 = new value
    597 	SET_SIZE(atomic_and_uchar_nv)
    598 	SET_SIZE(atomic_and_uchar)
    599 	SET_SIZE(atomic_and_8_nv)
    600 	SET_SIZE(atomic_and_8)
    601 
    602 	/*
    603 	 * NOTE: If atomic_and_16 and atomic_and_16_nv are ever
    604 	 * separated, you need to also edit the libc sparcv9 platform
    605 	 * specific mapfile and remove the NODYNSORT attribute
    606 	 * from atomic_and_16_nv.
    607 	 */
    608 	ENTRY(atomic_and_16)
    609 	ALTENTRY(atomic_and_16_nv)
    610 	ALTENTRY(atomic_and_ushort)
    611 	ALTENTRY(atomic_and_ushort_nv)
    612 	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
    613 	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
    614 	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
    615 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    616 	sethi	%hi(0xffff0000), %o3	! %o3 = mask
    617 	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
    618 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    619 	orn	%o1, %o3, %o1		! all ones in the other half
    620 	andn	%o0, 0x2, %o0		! %o0 = word address
    621 	! if low-order bit is 1, we will properly get an alignment fault here
    622 	ld	[%o0], %o2		! read old value
    623 1:
    624 	and	%o2, %o1, %o5		! and in the new value
    625 	cas	[%o0], %o2, %o5
    626 	cmp	%o2, %o5
    627 	bne,a,pn %icc, 1b
    628 	  mov	%o5, %o2		! %o2 = old value
    629 	and	%o2, %o1, %o5
    630 	and	%o5, %o3, %o5
    631 	retl
    632 	srl	%o5, %g1, %o0		! %o0 = new value
    633 	SET_SIZE(atomic_and_ushort_nv)
    634 	SET_SIZE(atomic_and_ushort)
    635 	SET_SIZE(atomic_and_16_nv)
    636 	SET_SIZE(atomic_and_16)
    637 
    638 	/*
    639 	 * NOTE: If atomic_and_32 and atomic_and_32_nv are ever
    640 	 * separated, you need to also edit the libc sparcv9 platform
    641 	 * specific mapfile and remove the NODYNSORT attribute
    642 	 * from atomic_and_32_nv.
    643 	 */
    644 	ENTRY(atomic_and_32)
    645 	ALTENTRY(atomic_and_32_nv)
    646 	ALTENTRY(atomic_and_uint)
    647 	ALTENTRY(atomic_and_uint_nv)
    648 	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
    649 0:
    650 	ld	[%o0], %o2
    651 1:
    652 	and	%o2, %o1, %o3
    653 	cas	[%o0], %o2, %o3
    654 	cmp	%o2, %o3
    655 	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
    656 	  mov	%o3, %o2
    657 	retl
    658 	and	%o2, %o1, %o0		! return new value
    659 2:
    660 	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and32, 0b)
    661 	SET_SIZE(atomic_and_uint_nv)
    662 	SET_SIZE(atomic_and_uint)
    663 	SET_SIZE(atomic_and_32_nv)
    664 	SET_SIZE(atomic_and_32)
    665 
    666 	/*
    667 	 * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
    668 	 * separated, you need to also edit the libc sparcv9 platform
    669 	 * specific mapfile and remove the NODYNSORT attribute
    670 	 * from atomic_and_64_nv.
    671 	 */
    672 	ENTRY(atomic_and_64)
    673 	ALTENTRY(atomic_and_64_nv)
    674 	ALTENTRY(atomic_and_ulong)
    675 	ALTENTRY(atomic_and_ulong_nv)
    676 	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
    677 0:
    678 	ldx	[%o0], %o2
    679 1:
    680 	and	%o2, %o1, %o3
    681 	casx	[%o0], %o2, %o3
    682 	cmp	%o2, %o3
    683 	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
    684 	  mov	%o3, %o2
    685 	retl
    686 	and	%o2, %o1, %o0		! return new value
    687 2:
    688 	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and64, 0b)
    689 	SET_SIZE(atomic_and_ulong_nv)
    690 	SET_SIZE(atomic_and_ulong)
    691 	SET_SIZE(atomic_and_64_nv)
    692 	SET_SIZE(atomic_and_64)
    693 
    694 	ENTRY(atomic_cas_8)
    695 	ALTENTRY(atomic_cas_uchar)
    696 	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
    697 	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
    698 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    699 	set	0xff, %o3		! %o3 = mask
    700 	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
    701 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    702 	and	%o1, %o3, %o1		! %o1 = single byte value
    703 	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
    704 	and	%o2, %o3, %o2		! %o2 = single byte value
    705 	andn	%o0, 0x3, %o0		! %o0 = word address
    706 	ld	[%o0], %o4		! read old value
    707 1:
    708 	andn	%o4, %o3, %o4		! clear target bits
    709 	or	%o4, %o2, %o5		! insert the new value
    710 	or	%o4, %o1, %o4		! insert the comparison value
    711 	cas	[%o0], %o4, %o5
    712 	cmp	%o4, %o5		! did we succeed?
    713 	be,pt	%icc, 2f
    714 	  and	%o5, %o3, %o4		! isolate the old value
    715 	cmp	%o1, %o4		! should we have succeeded?
    716 	be,a,pt	%icc, 1b		! yes, try again
    717 	  mov	%o5, %o4		! %o4 = old value
    718 2:
    719 	retl
    720 	srl	%o4, %g1, %o0		! %o0 = old value
    721 	SET_SIZE(atomic_cas_uchar)
    722 	SET_SIZE(atomic_cas_8)
    723 
    724 	ENTRY(atomic_cas_16)
    725 	ALTENTRY(atomic_cas_ushort)
    726 	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
    727 	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
    728 	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
    729 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    730 	sethi	%hi(0xffff0000), %o3	! %o3 = mask
    731 	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
    732 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    733 	and	%o1, %o3, %o1		! %o1 = single short value
    734 	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
    735 	and	%o2, %o3, %o2		! %o2 = single short value
    736 	andn	%o0, 0x2, %o0		! %o0 = word address
    737 	! if low-order bit is 1, we will properly get an alignment fault here
    738 	ld	[%o0], %o4		! read old value
    739 1:
    740 	andn	%o4, %o3, %o4		! clear target bits
    741 	or	%o4, %o2, %o5		! insert the new value
    742 	or	%o4, %o1, %o4		! insert the comparison value
    743 	cas	[%o0], %o4, %o5
    744 	cmp	%o4, %o5		! did we succeed?
    745 	be,pt	%icc, 2f
    746 	  and	%o5, %o3, %o4		! isolate the old value
    747 	cmp	%o1, %o4		! should we have succeeded?
    748 	be,a,pt	%icc, 1b		! yes, try again
    749 	  mov	%o5, %o4		! %o4 = old value
    750 2:
    751 	retl
    752 	srl	%o4, %g1, %o0		! %o0 = old value
    753 	SET_SIZE(atomic_cas_ushort)
    754 	SET_SIZE(atomic_cas_16)
    755 
    756 	ENTRY(atomic_cas_32)
    757 	ALTENTRY(atomic_cas_uint)
    758 	cas	[%o0], %o1, %o2
    759 	retl
    760 	mov	%o2, %o0
    761 	SET_SIZE(atomic_cas_uint)
    762 	SET_SIZE(atomic_cas_32)
    763 
    764 	ENTRY(atomic_cas_64)
    765 	ALTENTRY(atomic_cas_ptr)
    766 	ALTENTRY(atomic_cas_ulong)
    767 	casx	[%o0], %o1, %o2
    768 	retl
    769 	mov	%o2, %o0
    770 	SET_SIZE(atomic_cas_ulong)
    771 	SET_SIZE(atomic_cas_ptr)
    772 	SET_SIZE(atomic_cas_64)
    773 
    774 	ENTRY(atomic_swap_8)
    775 	ALTENTRY(atomic_swap_uchar)
    776 	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
    777 	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
    778 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    779 	set	0xff, %o3		! %o3 = mask
    780 	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
    781 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    782 	and	%o1, %o3, %o1		! %o1 = single byte value
    783 	andn	%o0, 0x3, %o0		! %o0 = word address
    784 	ld	[%o0], %o2		! read old value
    785 1:
    786 	andn	%o2, %o3, %o5		! clear target bits
    787 	or	%o5, %o1, %o5		! insert the new value
    788 	cas	[%o0], %o2, %o5
    789 	cmp	%o2, %o5
    790 	bne,a,pn %icc, 1b
    791 	  mov	%o5, %o2		! %o2 = old value
    792 	and	%o5, %o3, %o5
    793 	retl
    794 	srl	%o5, %g1, %o0		! %o0 = old value
    795 	SET_SIZE(atomic_swap_uchar)
    796 	SET_SIZE(atomic_swap_8)
    797 
    798 	ENTRY(atomic_swap_16)
    799 	ALTENTRY(atomic_swap_ushort)
    800 	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
    801 	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
    802 	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
    803 	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
    804 	sethi	%hi(0xffff0000), %o3	! %o3 = mask
    805 	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
    806 	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
    807 	and	%o1, %o3, %o1		! %o1 = single short value
    808 	andn	%o0, 0x2, %o0		! %o0 = word address
    809 	! if low-order bit is 1, we will properly get an alignment fault here
    810 	ld	[%o0], %o2		! read old value
    811 1:
    812 	andn	%o2, %o3, %o5		! clear target bits
    813 	or	%o5, %o1, %o5		! insert the new value
    814 	cas	[%o0], %o2, %o5
    815 	cmp	%o2, %o5
    816 	bne,a,pn %icc, 1b
    817 	  mov	%o5, %o2		! %o2 = old value
    818 	and	%o5, %o3, %o5
    819 	retl
    820 	srl	%o5, %g1, %o0		! %o0 = old value
    821 	SET_SIZE(atomic_swap_ushort)
    822 	SET_SIZE(atomic_swap_16)
    823 
    824 	ENTRY(atomic_swap_32)
    825 	ALTENTRY(atomic_swap_uint)
    826 	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
    827 0:
    828 	ld	[%o0], %o2
    829 1:
    830 	mov	%o1, %o3
    831 	cas	[%o0], %o2, %o3
    832 	cmp	%o2, %o3
    833 	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
    834 	  mov	%o3, %o2
    835 	retl
    836 	mov	%o3, %o0
    837 2:
    838 	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap32, 0b)
    839 	SET_SIZE(atomic_swap_uint)
    840 	SET_SIZE(atomic_swap_32)
    841 
    842 	ENTRY(atomic_swap_64)
    843 	ALTENTRY(atomic_swap_ptr)
    844 	ALTENTRY(atomic_swap_ulong)
    845 	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
    846 0:
    847 	ldx	[%o0], %o2
    848 1:
    849 	mov	%o1, %o3
    850 	casx	[%o0], %o2, %o3
    851 	cmp	%o2, %o3
    852 	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
    853 	  mov	%o3, %o2
    854 	retl
    855 	mov	%o3, %o0
    856 2:
    857 	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap64, 0b)
    858 	SET_SIZE(atomic_swap_ulong)
    859 	SET_SIZE(atomic_swap_ptr)
    860 	SET_SIZE(atomic_swap_64)
    861 
    862 	ENTRY(atomic_set_long_excl)
    863 	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
    864 	mov	1, %o3
    865 	slln	%o3, %o1, %o3
    866 0:
    867 	ldn	[%o0], %o2
    868 1:
    869 	andcc	%o2, %o3, %g0		! test if the bit is set
    870 	bnz,a,pn %ncc, 2f		! if so, then fail out
    871 	  mov	-1, %o0
    872 	or	%o2, %o3, %o4		! set the bit, and try to commit it
    873 	casn	[%o0], %o2, %o4
    874 	cmp	%o2, %o4
    875 	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
    876 	  mov	%o4, %o2
    877 	mov	%g0, %o0
    878 2:
    879 	retl
    880 	nop
    881 5:
    882 	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, setlongexcl, 0b)
    883 	SET_SIZE(atomic_set_long_excl)
    884 
    885 	ENTRY(atomic_clear_long_excl)
    886 	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
    887 	mov	1, %o3
    888 	slln	%o3, %o1, %o3
    889 0:
    890 	ldn	[%o0], %o2
    891 1:
    892 	andncc	%o3, %o2, %g0		! test if the bit is clear
    893 	bnz,a,pn %ncc, 2f		! if so, then fail out
    894 	  mov	-1, %o0
    895 	andn	%o2, %o3, %o4		! clear the bit, and try to commit it
    896 	casn	[%o0], %o2, %o4
    897 	cmp	%o2, %o4
    898 	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
    899 	  mov	%o4, %o2
    900 	mov	%g0, %o0
    901 2:
    902 	retl
    903 	nop
    904 5:
    905 	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, clrlongexcl, 0b)
    906 	SET_SIZE(atomic_clear_long_excl)
    907 
    908 #if !defined(_KERNEL)
    909 
    910 	/*
    911 	 * Spitfires and Blackbirds have a problem with membars in the
    912 	 * delay slot (SF_ERRATA_51).  For safety's sake, we assume
    913 	 * that the whole world needs the workaround.
    914 	 */
    915 	ENTRY(membar_enter)
    916 	membar	#StoreLoad|#StoreStore
    917 	retl
    918 	nop
    919 	SET_SIZE(membar_enter)
    920 
    921 	ENTRY(membar_exit)
    922 	membar	#LoadStore|#StoreStore
    923 	retl
    924 	nop
    925 	SET_SIZE(membar_exit)
    926 
    927 	ENTRY(membar_producer)
    928 	membar	#StoreStore
    929 	retl
    930 	nop
    931 	SET_SIZE(membar_producer)
    932 
    933 	ENTRY(membar_consumer)
    934 	membar	#LoadLoad
    935 	retl
    936 	nop
    937 	SET_SIZE(membar_consumer)
    938 
    939 #endif	/* !_KERNEL */
    940