Home | History | Annotate | Download | only in ml
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /
     28 / Inline functions for i386 kernels.
     29 /	Shared between all x86 platform variants.
     30 /
     31 
     32 /
     33 / return current thread pointer
     34 /
     35 / NOTE: the "0x10" should be replaced by the computed value of the
     36 /	offset of "cpu_thread" from the beginning of the struct cpu.
     37 /	Including "assym.h" does not work, however, since that stuff
     38 /	is PSM-specific and is only visible to the 'unix' build anyway.
     39 /	Same with current cpu pointer, where "0xc" should be replaced
     40 /	by the computed value of the offset of "cpu_self".
     41 /	Ugh -- what a disaster.
     42 /
     43 	.inline	threadp,0
     44 	movl	%gs:0x10, %eax
     45 	.end
     46 
     47 /
     48 / return current cpu pointer
     49 /
     50 	.inline	curcpup,0
     51 	movl	%gs:0xc, %eax
     52 	.end
     53 
     54 /
     55 / return caller
     56 /
     57 	.inline caller,0
     58 	movl	4(%ebp), %eax
     59 	.end
     60 
     61 /
     62 / convert ipl to spl.  This is the identity function for i86
     63 /
     64 	.inline	ipltospl,0
     65 	movl	(%esp), %eax
     66 	.end
     67 
     68 /
     69 / find the low order bit in a word
     70 /
     71 	.inline lowbit,4
     72 	movl	$-1, %eax
     73 	bsfl	(%esp), %eax
     74 	incl	%eax
     75 	.end
     76 
     77 /
     78 / find the high order bit in a word
     79 /
     80 	.inline highbit,4
     81 	movl	$-1, %eax
     82 	bsrl	(%esp), %eax
     83 	incl	%eax
     84 	.end
     85 
     86 /
     87 / Networking byte order functions (too bad, Intel has the wrong byte order)
     88 /
     89 	.inline	htonll,4
     90 	movl	(%esp), %edx
     91 	movl	4(%esp), %eax
     92 	bswap	%edx
     93 	bswap	%eax
     94 	.end
     95 
     96 	.inline	ntohll,4
     97 	movl	(%esp), %edx
     98 	movl	4(%esp), %eax
     99 	bswap	%edx
    100 	bswap	%eax
    101 	.end
    102 
    103 	.inline	htonl,4
    104 	movl	(%esp), %eax
    105 	bswap	%eax
    106 	.end
    107 
    108 	.inline	ntohl,4
    109 	movl	(%esp), %eax
    110 	bswap	%eax
    111 	.end
    112 
    113 	.inline	htons,4
    114 	movl	(%esp), %eax
    115 	bswap	%eax
    116 	shrl	$16, %eax
    117 	.end
    118 
    119 	.inline	ntohs,4
    120 	movl	(%esp), %eax
    121 	bswap	%eax
    122 	shrl	$16, %eax
    123 	.end
    124 
    125 /*
    126  * multiply two long numbers and yield a u_longlong_t result
    127  * Provided to manipulate hrtime_t values.
    128  */
    129 	.inline mul32, 8
    130 	movl	4(%esp), %eax
    131 	movl	(%esp), %ecx
    132 	mull	%ecx
    133 	.end
    134 
    135 /*
    136  * Unlock hres_lock and increment the count value. (See clock.h)
    137  */
    138 	.inline unlock_hres_lock, 0
    139 	lock
    140 	incl	hres_lock
    141 	.end
    142 
    143 	.inline	atomic_orb,8
    144 	movl	(%esp), %eax
    145 	movl    4(%esp), %edx
    146 	lock
    147 	orb	%dl,(%eax)
    148 	.end
    149 
    150 	.inline	atomic_andb,8
    151 	movl	(%esp), %eax
    152 	movl    4(%esp), %edx
    153 	lock
    154 	andb	%dl,(%eax)
    155 	.end
    156 
    157 /*
    158  * atomic inc/dec operations.
    159  *	void atomic_inc16(uint16_t *addr) { ++*addr; }
    160  *	void atomic_dec16(uint16_t *addr) { --*addr; }
    161  */
    162 	.inline	atomic_inc16,4
    163 	movl	(%esp), %eax
    164 	lock
    165 	incw	(%eax)
    166 	.end
    167 
    168 	.inline	atomic_dec16,4
    169 	movl	(%esp), %eax
    170 	lock
    171 	decw	(%eax)
    172 	.end
    173 
    174 /*
    175  * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
    176  * a hint that the code sequence is a busy spin-wait loop.  Without a pause
    177  * instruction in these loops, the P4 Xeon processor may suffer a severe
    178  * penalty when exiting the loop because the processor detects a possible
    179  * memory violation.  Inserting the pause instruction significantly reduces
    180  * the likelihood of a memory order violation, improving performance.
    181  * The pause instruction is a NOP on all other IA-32 processors.
    182  */
    183 	.inline ht_pause, 0
    184 	rep			/ our compiler doesn't support "pause" yet,
    185 	nop			/ so we're using "F3 90" opcode directly
    186 	.end
    187 
    188 /*
    189  * prefetch 64 bytes
    190  *
    191  * prefetch is an SSE extension which is not supported on older 32-bit processors
    192  * so define this as a no-op for now
    193  */
    194 
    195  	.inline	prefetch_read_many,4
    196 /	movl		(%esp), %eax
    197 /	prefetcht0	(%eax)
    198 /	prefetcht0	32(%eax)
    199 	.end
    200 
    201  	.inline	prefetch_read_once,4
    202 /	movl		(%esp), %eax
    203 /	prefetchnta	(%eax)
    204 /	prefetchnta	32(%eax)
    205 	.end
    206 
    207  	.inline	prefetch_write_many,4
    208 /	movl		(%esp), %eax
    209 /	prefetcht0	(%eax)
    210 /	prefetcht0	32(%eax)
    211 	.end
    212 
    213  	.inline	prefetch_write_once,4
    214 /	movl		(%esp), %eax
    215 /	prefetcht0	(%eax)
    216 /	prefetcht0	32(%eax)
    217 	.end
    218 
    219