Home | History | Annotate | Download | only in ml
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /
     27 / In-line functions for amd64 kernels.
     28 /
     29 
     30 /
     31 / return current thread pointer
     32 /
     33 / NOTE: the "0x18" should be replaced by the computed value of the
     34 /	offset of "cpu_thread" from the beginning of the struct cpu.
     35 /	Including "assym.h" does not work, however, since that stuff
     36 /	is PSM-specific and is only visible to the 'unix' build anyway.
     37 /	Same with current cpu pointer, where "0xc" should be replaced
     38 /	by the computed value of the offset of "cpu_self".
     39 /	Ugh -- what a disaster.
     40 /
     41 	.inline	threadp,0
     42 	movq	%gs:0x18, %rax
     43 	.end
     44 
     45 /
     46 / return current cpu pointer
     47 /
     48 	.inline	curcpup,0
     49 	movq	%gs:0x10, %rax
     50 	.end
     51 
     52 /
     53 / return caller
     54 /
     55 	.inline caller,0
     56 	movq	8(%rbp), %rax
     57 	.end
     58 
     59 /
     60 / convert ipl to spl.  This is the identity function for i86
     61 /
     62 	.inline	ipltospl,0
     63 	movq	%rdi, %rax
     64 	.end
     65 
     66 /
     67 / find the low order bit in a word
     68 /
     69 	.inline lowbit,4
     70 	movq	$-1, %rax
     71 	bsfq	%rdi, %rax
     72 	incq	%rax
     73 	.end
     74 
     75 /
     76 / Networking byte order functions (too bad, Intel has the wrong byte order)
     77 /
     78 
     79 	.inline	htonll,4
     80 	movq	%rdi, %rax
     81 	bswapq	%rax
     82 	.end
     83 
     84 	.inline	ntohll,4
     85 	movq	%rdi, %rax
     86 	bswapq	%rax
     87 	.end
     88 
     89 	.inline	htonl,4
     90 	movl	%edi, %eax
     91 	bswap	%eax
     92 	.end
     93 
     94 	.inline	ntohl,4
     95 	movl	%edi, %eax
     96 	bswap	%eax
     97 	.end
     98 
     99 	.inline	htons,4
    100 	movl	%edi, %eax
    101 	bswap	%eax
    102 	shrl	$16, %eax
    103 	.end
    104 
    105 	.inline	ntohs,4
    106 	movl	%edi, %eax
    107 	bswap	%eax
    108 	shrl	$16, %eax
    109 	.end
    110 
    111 /*
    112  * multiply two long numbers and yield a u_lonlong_t result
    113  * Provided to manipulate hrtime_t values.
    114  */
    115 	/* XX64 These don't work correctly with SOS9 build 13.0 yet
    116 	.inline mul32, 8
    117 	xorl	%edx, %edx
    118 	movl	%edi, %eax
    119 	mull	%esi
    120 	shlq	$32, %rdx
    121 	orq	%rdx, %rax
    122 	ret
    123 	.end
    124 	*/
    125 /*
    126  * Unlock hres_lock and increment the count value. (See clock.h)
    127  */
    128 	.inline unlock_hres_lock, 0
    129 	lock
    130 	incl	hres_lock
    131 	.end
    132 
    133 	.inline	atomic_orb,8
    134 	movl	%esi, %eax
    135 	lock
    136 	orb	%al,(%rdi)
    137 	.end
    138 
    139 	.inline	atomic_andb,8
    140 	movl	%esi, %eax
    141 	lock
    142 	andb	%al,(%rdi)
    143 	.end
    144 
    145 /*
    146  * atomic inc/dec operations.
    147  *	void atomic_inc16(uint16_t *addr) { ++*addr; }
    148  *	void atomic_dec16(uint16_t *addr) { --*addr; }
    149  */
    150 	.inline	atomic_inc16,4
    151 	lock
    152 	incw	(%rdi)
    153 	.end
    154 
    155 	.inline	atomic_dec16,4
    156 	lock
    157 	decw	(%rdi)
    158 	.end
    159 
    160 /*
    161  * atomic bit clear
    162  */
    163 	.inline atomic_btr32,8
    164 	lock
    165 	btrl %esi, (%rdi)
    166 	setc %al
    167 	.end
    168 
    169 /*
    170  * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
    171  * a hint that the code sequence is a busy spin-wait loop.  Without a pause
    172  * instruction in these loops, the P4 Xeon processor may suffer a severe
    173  * penalty when exiting the loop because the processor detects a possible
    174  * memory violation.  Inserting the pause instruction significantly reduces
    175  * the likelihood of a memory order violation, improving performance.
    176  * The pause instruction is a NOP on all other IA-32 processors.
    177  */
    178 	.inline ht_pause, 0
    179 	pause
    180 	.end
    181 
    182 /*
    183  * inlines for update_sregs().
    184  */
    185         .inline __set_ds, 0
    186         movw    %di, %ds
    187         .end
    188 
    189         .inline __set_es, 0
    190         movw    %di, %es
    191         .end
    192 
    193         .inline __set_fs, 0
    194         movw    %di, %fs
    195         .end
    196 
    197         .inline __set_gs, 0
    198         movw    %di, %gs
    199         .end
    200 
    201 	/*
    202 	 * OPTERON_ERRATUM_88 requires mfence
    203 	 */
    204         .inline __swapgs, 0
    205         mfence
    206         swapgs
    207 	.end
    208 
    209 /*
    210  * prefetch 64 bytes
    211  */
    212 
    213  	.inline	prefetch_read_many,8
    214 	prefetcht0	(%rdi)
    215 	prefetcht0	32(%rdi)
    216 	.end
    217 
    218  	.inline	prefetch_read_once,8
    219 	prefetchnta	(%rdi)
    220 	prefetchnta	32(%rdi)
    221 	.end
    222 
    223  	.inline	prefetch_write_many,8
    224 	prefetcht0	(%rdi)
    225 	prefetcht0	32(%rdi)
    226 	.end
    227 
    228  	.inline	prefetch_write_once,8
    229 	prefetcht0	(%rdi)
    230 	prefetcht0	32(%rdi)
    231 	.end
    232